% Conference and Journal Names
@string{AAAI = {Proc. {AAAI} Conference on Artificial Intelligence {(AAAI)}}}
@string{NAACL = {Proc. Conference of the North American Chapter of the Association for Computational Linguistics {(NAACL)}}}
@string{RANLP = {Proc. Recent Advances in Natural Language Processing {(RANLP)}}}
@string{IJCAI= {Proc. International Joint Conference on Artificial Intelligence {(IJCAI)}}}
@string{CHI =  {Proc. Conference on Human Factors in Computing Systems {(CHI)}}}
@string{CIKM= {Proc. ACM Conference on Information and Knowledge Management {(CIKM)}}}
@string{TREC = {Proc. Text REtrieval Conference {(TREC)}}}
@string{COLT = {Proc. Conference on Learning Theory {(COLT)}}}
@string{ECIR = {Proc. European Conference on Information Retrieval {(ECIR)}}}
@string{ECML = {Proc. European Conference on Machine Learning {(ECML)}}}
@string{ACL  = {Proc. Annual Meeting of the Association for Computational Linguistics {(ACL)}}},
@string{EMNLP= {Proc. Empirical Methods in Natural Language Processing {(EMNLP)}}}
@string{CONLL= {Proc. {ACL} Conference on Natural Language Learning {(gCoNLL)}}}
@string{COLING = {Proc. International Conference on Computational Linguistics {(COLING)}}}
@string{HLT  = {Proc. {NAACL} Conference on Human Language Technologies {(NAACL-HLT)}}}
@string{WHLT  = {Proc. {ACL} Workshop on Human Language Technologies {(ACL-HLT)}}} 
@string{ICML = {Proc. International Conference on Machine Learning {(ICML)}}}
@string{JAIR = {Journal of Artificial Intelligence Research {(JAIR)}}}
@string{JMLR = {Journal of Machine Learning Research {(JMLR)}}}
@string{MLJ  = {Machine Learning}}
@string{KDD = {Proc. {ACM} Conference on Knowledge Discovery and Data Mining {(KDD)}}}
@string{NIPS = {Proc. Neural Information Processing Systems {(NIPS)}}}
@string{PKDD = {Proc. European Conference on Principles and Pratice of Knowledge Discovery in Databases ({PKDD})}}
@string{SIGIR ={Proc. {ACM} Conference on Research and Development in Information Retrieval {(SIGIR)}}}
@string{TOIS = {{ACM} Transactions on Information Science {(TOIS)}}}
@string{UAI =  {Proc. Conference on Uncertainty in Artificial Intelligence {(UAI)}}}
@string{AISTATS={Proc. International Conference on Artificial Intelligence and Statistics {(AISTATS)}}}
@string{UM =   {Proc. International Conference on User Modeling {(UM)}}}
@string{VLDB = {Proc. International Conference on Very Large Data Bases {(VLDB)}}}
@string{WWW =  {Proc. World Wide Web Conference {(WWW)}}}
@string{CVPR = {Proc. {IEEE} Conference on Computer Vision and Pattern Recognition {(CVPR)}}}
@string{ICPR = {Proc. {IAPR} International Conference on Pattern Recognition {(ICPR)}}}
@string{SDM = {Proc. {SIAM} Conference on Data Mining {(SDM)}}}
@string{RECOMB ={Proc. International Conference on Research in Computational Molecular Biology {(RECOMB)}}}
@string{SODA = {Proc. {ACM-SIAM} Symposium on Discrete Algorithms {(SODA)}}}
@string{STOC = {Proc. {ACM} Symposium on Theory of Computing {(STOC)}}}
@string{FOCS = {Proc. {IEEE} Symposium on Foundations of Computer Science {(FOCS)}}}
@string{ICALP = {Proc. International Colloquium on Automata, Languanges and Programming {(ICALP)}}}
@string{TOIS = {ACM Transactions on Information Systems {(TOIS)}}}
@string{ICANN = {Proc. International Conference on Artificial Neural Networks {(ICANN)}}}
@string{JOC = {{SIAM} Journal on Computing}}
@string{LREC = {Proc. International Conference on Language Resources and Evaluation {(LREC)}}}
@string{ECAI = {Proc. European Conference on Artificial Intelligence {(ECAI)}}}
@string{LexSem = {Lexical Semantics}}
@string{ICASSP = {Proc. International Conference on Acoustics, Speech and Signal Processing  {(ICASSP)}}}
@string{INTERSPEECH = {Proc. Annual Conference of International Speech Communication Association   {(INTERSPEECH)}}}
@string{ASRU = {Proc. IEEE Workshop on Automfatic Speech Recognition and Understanding  {(ASRU)}}}
@string{SLT = {Proc. {IEEE} Spoken Language Technology Workshop  {(SLT)}}}
@string{CSL = {Computer Speech and Language}}
@string{ICSLP = {Proc. International Conference on Spoken Language Processing  {(ICSLP)}}}
@string{EUROSPEECH = {Proc. European Conference on Speech Communication and Technology  {(EUROSPEECH)}}}
@string{TransASLP = {{IEEE} Transactions on Audio, Speech and Language Processing}}
@string{TransSAP = {{IEEE} Transactions on Speech and Audio Processing}}
@string{SemEval = {Proc. International Workshop on Semantic Evaluations}}
@string{ICSC = {Proc. {IEEE} International Conference on Semantic Computing {(ICSC)}}}
@string{PAMI = {{IEEE} Transactions on Pattern Analysis and Machine Intelligence {(PAMI)}}}
@string{SPM = {{IEEE} Signal Processing Magazine}
@string{ICLR = {Proc. International Conference on Learning Representation {(ICLR)}}}
@string{DARPASNLW = {Proc. {DARPA} Speech and Natural Language Workshop}}
@string{ICCV = {Proc. {IEEE} International Conference on Computer Vision {(ICCV)}}}
@string{IJCNN = {Proc. International Conference on Neural Networks {(IJCNN)}}}
@string{TransKDE= {{IEEE} Transactions on Knowledge and Data Engineering}}
@string{JASA= {The journal of the acoustical society of america {(JASA)}}
@string{ICSP= { IEEE International Conference on Signal Processing {(ICSP)}}
@string{TOG= {ACM Transactions on Graphics {(TOG)}}

@article{CNN-atlas1988,
  title={An artificial neural network for spatio-temporal bipolar patterns: Application to phoneme classification},
  author={Atlas, Les E and Homma, Toshiteru and Marks II, Robert J},
  journal=NIPS,
  pages={31},
  year={1988}
}

@book{StronglyConnectedComponents-Hopcroft+1983,
  title={Data structures and algorithms},
  author={Hopcroft, John E},
  year={1983},
  publisher={Pearson education}
}

@article{StronglyConnectedComponents-Tarjan-1972,
  title={Depth-first search and linear graph algorithms},
  author={Tarjan, Robert},
  journal={SIAM journal on computing},
  volume={1},
  number={2},
  pages={146--160},
  year={1972},
  publisher={SIAM}
}

@inproceedings{RASR-NN-RWTH-Toolkit-Wiesler+2014,
  title={{RASR/NN}: THE {RWTH} NEURAL NETWORK TOOLKIT FOR SPEECH RECOGNITION},
  author={Wiesler, Simon and Richard, Alexander and Golik, Pavel and Schluter, Ralf and Ney, Hermann },
  booktitle=ICASSP,
  pages={3305--3309},
  year={2014},
}

@article{AutomaticDifferentiation-bischof+1997,
  title={{ADIC}: an extensible automatic differentiation tool for {ANSI-C}},
  author={Bischof, Christian and Roh, Lucas and Mauer-Oats, Andrew},
  journal={Urbana},
  volume={51},
  pages={61802},
  year={1997}
}

@book{EvaluatingDerivatives-griewank+2008,
  title={Evaluating derivatives: principles and techniques of algorithmic differentiation},
  author={Griewank, Andreas and Walther, Andrea},
  year={2008},
  publisher={Siam}
}

@article{IntriguingPropertiesOfDNN-szegedy+2013,
  title={Intriguing properties of neural networks},
  author={Szegedy, Christian and Zaremba, Wojciech and Sutskever, Ilya and Bruna, Joan and Erhan, Dumitru and Goodfellow, Ian and Fergus, Rob},
  journal={arXiv preprint arXiv:1312.6199},
  year={2013}
}

@article{CNTK-Intro-YU+2014,
  title={An Introduction to Computational Networks and the Computational Network Toolkit},
  author={Yu, Dong and Eversole,  Adam and Seltzer, Mike and Yao, Kaisheng and 
Guenter, Brian and Kuchaiev, Oleksii and Seide, Frank and Wang, Huaming and Droppo, Jasha and Huang, Zhiheng and Zweig, Geoffrey and Rossbach, Chris and Currey, Jon },
  journal={Microsoft Technical Report},
  year={2014}
}


@inproceedings{Theano-bergstra+2010,
  title={Theano: a {CPU} and {GPU} math expression compiler},
  author={Bergstra, James and Breuleux, Olivier and Bastien, Fr{\'e}d{\'e}ric and Lamblin, Pascal and Pascanu, Razvan and Desjardins, Guillaume and Turian, Joseph and Warde-Farley, David and Bengio, Yoshua},
  booktitle={Proceedings of the Python for scientific computing conference (SciPy)},
  volume={4},
  year={2010}
}

@inproceedings{SymbolicDifferentiation-guenter2007,
  title={Efficient symbolic differentiation for graphics applications},
  author={Guenter, Brian},
  booktitle=TOG,
  volume={26},
  number={3},
  pages={108},
  year={2007},
}

@inproceedings{DCNN-LVCSR-sainath+2013,
  title={Deep convolutional neural networks for {LVCSR}},
  author={Sainath, Tara N and Mohamed, Abdel-rahman and Kingsbury, Brian and Ramabhadran, Bhuvana},
  booktitle=ICASSP,
  pages={8614--8618},
  year={2013},
}

@inproceedings{CNN-LVCSR-sainath+2013,
  title={Improvements to deep convolutional neural networks for LVCSR},
  author={Sainath, Tara N and Kingsbury, Brian and Mohamed, Abdel-rahman and Dahl, George E and Saon, George and Soltau, Hagen and Beran, Tomas and Aravkin, Aleksandr Y and Ramabhadran, Bhuvana},
  booktitle=ASRU,
  pages={315--320},
  year={2013},
}

@inproceedings{DCNN-ImageNet-krizhevsky+2012,
  title={ImageNet Classification with Deep Convolutional Neural Networks.},
  author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
  booktitle={NIPS},
  volume={1},
  number={2},
  pages={4},
  year={2012}
}

@inproceedings{CNN-FeatureHierarchy-kavukcuoglu+2010,
  title={Learning Convolutional Feature Hierarchies for Visual Recognition.},
  author={Kavukcuoglu, Koray and Sermanet, Pierre and Boureau, Y-Lan and Gregor, Karol and Mathieu, Micha{\"e}l and LeCun, Yann},
  booktitle={NIPS},
  volume={1},
  number={2},
  pages={5},
  year={2010}
}

@inproceedings{CNN-ASR-deng+2013,
  title={A deep convolutional neural network using heterogeneous pooling for trading acoustic invariance with phonetic confusion},
  author={Deng, Li and Abdel-Hamid, Ossama and Yu, Dong},
  booktitle=ICASSP,
  pages={6669--6673},
  year={2013},
}

@inproceedings{CNN-ASR-abdel+2012,
  title={Applying convolutional neural networks concepts to hybrid NN-HMM model for speech recognition},
  author={Abdel-Hamid, Ossama and Mohamed, Abdel-rahman and Jiang, Hui and Penn, Gerald},
  booktitle=ICASSP,
  pages={4277--4280},
  year={2012},
  organization={IEEE}
}

@article{CNN-ASR-Abdel+2013,
  title={Exploring Convolutional Neural Network Structures and Optimization Techniques for Speech Recognition},
  author={Abdel-Hamid, Ossama and Deng, Li and Yu, Dong},
  booktitle=INTERSPEECH,
  pages={3366--3370},
  year={2013}
}

@article{CNN-lecun:1995,
  title={Convolutional networks for images, speech, and time series},
  author={LeCun, Yann and Bengio, Yoshua},
  journal={The handbook of brain theory and neural networks},
  volume={3361},
  year={1995},
  publisher={Cambridge, MA: MIT Press}
}

@article{TDNN-lang+1990,
  title={A time-delay neural network architecture for isolated word recognition},
  author={Lang, Kevin J and Waibel, Alex H and Hinton, Geoffrey E},
  journal={Neural networks},
  volume={3},
  number={1},
  pages={23--43},
  year={1990},
}

@inproceedings{CNN-FastComputing-chellapilla+2006,
  title={High performance convolutional neural networks for document processing},
  author={Chellapilla, Kumar and Puri, Sidd and Simard, Patrice},
  booktitle={Tenth International Workshop on Frontiers in Handwriting Recognition},
  year={2006}
}

@article{LSTM-hochreiter:1997,
  title={Long short-term memory},
  author={Hochreiter, Sepp and Schmidhuber, J{\"u}rgen},
  journal={Neural computation},
  volume={9},
  number={8},
  pages={1735--1780},
  year={1997},
  publisher={MIT Press}
}

@article{LSTM-GenSequence-Graves-2013,
  title={Generating sequences with recurrent neural networks},
  author={Graves, Alex},
  journal={arXiv preprint arXiv:1308.0850},
  year={2013}
}

@inproceedings{DropoutPretrain-Zhang+2014,
  title={Improving deep neural networks for {LVCSR} using dropout and shrinking structure},
  author={Zhang, Shiliang and Bao, Yebo and Zhou, Pan and Jiang, Hui and Dai Li-Rong},
  booktitle=ICASSP,
  pages={6899--6903},
  year={2014}
}

@inproceedings{GMMFreeDNNTraining-Senior+2014,
  title={{GMM}-free {DNN} training},
  author={Senior, Andrew  and Heigold, Georg  and  Bacchiani, Michiel  and  Liao, Hank},
  booktitle=ICASSP,
  year={2014}
}

@inproceedings{KL-HMM-Aradilla+2008,
  title={Using {KL}-based acoustic models in a large vocabulary recognition task},
  author={Aradilla, Guillermo and Bourlard, Herv{\'e} and Magimai-Doss, Mathew},
  booktitle=INTERSPEECH,
  pages={928--931},
  year={2008}
}

@inproceedings{KL-HMM-LVCSR-Aradilla+2007,
  title={An acoustic model based on Kullback-Leibler divergence for posterior features},
  author={Aradilla, Guillermo and Vepa, Jithendra and Bourlard, Herv{\'e}},
  booktitle=ICASSP,
  volume={4},
  pages={IV-657},
  year={2007}
}

@inproceedings{DNNAdapt-SpeakerCode-Xue+2014,
  title={Direct Adaptation Of Hybrid {DNN/HMM} Model For Fast Speaker Adaptation In {LVCSR} Based On Speaker Code},
  author={Xue, Shaofei and Abdel-Hamid, Ossama and Jiang, Hui and Dai, Lirong},
  booktitle=ICASSP,
  pages={6389--6393},
  year={2014}
}

@book{ElementsOfStatisticalLearning-hastie+2009,
  title={The elements of statistical learning},
  author={Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome and Hastie, T and Friedman, J and Tibshirani, R},
  volume={2},
  number={1},
  year={2009},
  publisher={Springer}
}

@article{LM-NoiseContrastiveEstimation-mnih:2012,
  title={A fast and simple algorithm for training neural probabilistic language models},
  author={Mnih, Andriy and Teh, Yee Whye},
  journal={arXiv preprint arXiv:1206.6426},
  year={2012}
}

@article{NoiseContrastiveEstimation-gutmann:2012,
  title={Noise-contrastive estimation of unnormalized statistical models, with applications to natural image statistics},
  author={Gutmann, Michael U and Hyv{\"a}rinen, Aapo},
  journal={The Journal of Machine Learning Research},
  volume={13},
  pages={307--361},
  year={2012},
  publisher={JMLR. org}
}

@inproceedings{NoiseContrastiveEstimation-gutmann:2010,
  title={Noise-contrastive estimation: A new estimation principle for unnormalized statistical models},
  author={Gutmann, Michael and Hyv{\"a}rinen, Aapo},
  booktitle={International Conference on Artificial Intelligence and Statistics},
  pages={297--304},
  year={2010}
}

@book{OptimalControl-bryson:1969,
  title={Applied optimal control: optimization, estimation, and control},
  author={Earl Bryson, Arthur and Ho, Yu-Chi },
  year={1969},
  publisher={Blaisdell Publishing Company}
}

@inproceedings{Multistream-zhou+2012,
  title={Combining information from multi-stream features using deep neural network in speech recognition},
  author={Zhou, Pan and Dai, Lirong and Liu, Qingfeng and Jiang, Hui},
  booktitle=ICSP,
  volume={1},
  pages={557--561},
  year={2012},
}

@article{HumanSR-allen-1994,
  title={How do humans process and recognize speech?},
  author={Allen, Jont B},
  journal=TransSAP,
  volume={2},
  number={4},
  pages={567--577},
  year={1994},
}

@inproceedings{MultiStreamASR-bourlard-1999,
  title={Non-stationary multi-channel (multi-stream) processing towards robust and adaptive ASR},
  author={Bourlard, Herv{\'e} and others},
  booktitle={Proc. Workshop on Robust Methods for Speech Recognition in Adverse Conditions},
  pages={1--10},
  year={1999}
}

@article{MultiStreamASR-bourlard+1996,
  title={Multi Stream Speech Recognition},
  author={Bourlard, Herv{\'e} and Dupont, St{\'e}phane and Martigny Valais Suisse, Christophe Ris},
  year={1996},
  publisher={Citeseer}
}

@inproceedings{MultimodalDeepLearning-ngiam+2011,
  title={Multimodal deep learning},
  author={Ngiam, Jiquan and Khosla, Aditya and Kim, Mingyu and Nam, Juhan and Lee, Honglak and Ng, Andrew Y},
  booktitle={Proceedings of the 28th International Conference on Machine Learning (ICML-11)},
  pages={689--696},
  year={2011}
}

@incollection{AudioVisual-NN-kim+2005,
  title={Speech recognition by integrating audio, visual and contextual features based on neural networks},
  author={Kim, Myung Won and Ryu, Joung Woo and Kim, Eun Ju},
  booktitle={Advances in Natural Computation},
  pages={155--164},
  year={2005},
  publisher={Springer}
}

@article{AudioVisual-NN-lewis:2003,
  title={Audio-visual speech recognition using red exclusion and neural networks},
  author={Lewis, Trent W and Powers, David MW},
  journal={Journal of Research and Practice in Information Technology},
  volume={35},
  number={1},
  pages={41--64},
  year={2003}
}

@article{AudioVisual-Review-potamianos+2003,
  title={Recent advances in the automatic recognition of audiovisual speech},
  author={Potamianos, Gerasimos and Neti, Chalapathy and Gravier, Guillaume and Garg, Ashutosh and Senior, Andrew W},
  journal={Proceedings of the IEEE},
  volume={91},
  number={9},
  pages={1306--1326},
  year={2003},
}

@article{AudioVisual-Review-chibelushi+2002,
  title={A review of speech-based bimodal recognition},
  author={Chibelushi, Claude C and Deravi, Farzin and Mason, John SD},
  journal={Multimedia, IEEE Transactions on},
  volume={4},
  number={1},
  pages={23--37},
  year={2002},
}

@article{AudioVisual-dupont:2000,
  title={Audio-visual speech modeling for continuous speech recognition},
  author={Dupont, St{\'e}phane and Luettin, Juergen},
  journal={Multimedia, IEEE Transactions on},
  volume={2},
  number={3},
  pages={141--151},
  year={2000},
}

@article{AudioVisual-chen:1998,
  title={Audio-visual integration in multimodal communication},
  author={Chen, Tsuhan and Rao, Ram R},
  journal={Proceedings of the IEEE},
  volume={86},
  number={5},
  pages={837--852},
  year={1998},
}

@article{VisualHelpsSpeech-sumby:1954,
  title={Visual contribution to speech intelligibility in noise},
  author={Sumby, William H and Pollack, Irwin},
  journal=JASA,
  volume={26},
  number={2},
  pages={212--215},
  year={1954},
}

@article{TIMIT-lee:1989,
  title={Speaker-independent phone recognition using hidden {M}arkov models},
  author={Lee, K-F and Hon, H-W},
  journal=TransSAP,
  volume={37},
  number={11},
  pages={1641--1648},
  year={1989},
}

@book{TIMIT-Corps-garofolo-1993,
  title={Darpa Timit: Acoustic-phonetic Continuous Speech Corps {CD-ROM}},
  author={Garofolo, John S},
  year={1993},
  publisher={US Department of Commerce, National Institute of Standards and Technology}
}

@inproceedings{audio-visual-Huang:2013,
  title={Audio-visual deep learning for noise robust speech recognition},
  author={Huang, Jing and Kingsbury, Brian},
  booktitle=ICASSP,
  pages={7596--7599},
  year={2013},
}

@article{TransferLearning-Survey-Pan:2010,
  title={A survey on transfer learning},
  author={Pan, Sinno Jialin and Yang, Qiang},
  journal=TransKDE,
  volume={22},
  number={10},
  pages={1345--1359},
  year={2010},
}

@inproceedings{multitask-DNN-Grapheme-Chen+2014,
  title={Joint Acoustic Modeling of Triphones and Trigraphemes by Multi-Task Learning Deep Neural Networks for Low-Resource Speech Recognition},
  author={Chen, Dongpeng and  Mak, Brian and  Leung, Cheung-Chi and  Sivadas, Sunil},
  booktitle=ICASSP,
  year={2014},
}

@inproceedings{multitask-DNN-Seltzer:2013,
  title={Multi-task learning in deep neural networks for improved phoneme recognition},
  author={Seltzer, Michael L and Droppo, Jasha},
  booktitle=ICASSP,
  pages={6965--6969},
  year={2013},
}

@inproceedings{GMM-DNN-Compare-Huang+2014,
  title={A Comparative Analytic Study On The Gaussian Mixture and Context Dependent Deep Neural Network Hidden Markov Models},
  author={Huang, Yan and Yu, Dong and Liu, Chaojun  and Gong, Yifan},
  booktitle=INTERSPEECH,
  year={2014},
}

@inproceedings{SGD-Parallel-Seide+2014,
  title={On Parallelizability of Stochastic Gradient Descent for Speech DNNs},
  author={Seide, Frank and Fu, Hao and Droppo, Jasha and Li, Gang and Yu, Dong},
  booktitle=ICASSP,
  year={2014},
}

@article{VisualizeCNN-Zeiler:2013,
  title={Visualizing and Understanding Convolutional Neural Networks},
  author={Zeiler, Matthew D and Fergus, Rob},
  journal={arXiv preprint arXiv:1311.2901},
  year={2013}
}

@article{EM-Moon1996,
  title={The expectation-maximization algorithm},
  author={Moon, Todd K},
  journal=SPM,
  volume={13},
  number={6},
  pages={47--60},
  year={1996},
}

@article{MCE-Juang+1997,
  title={Minimum classification error rate methods for speech recognition},
  author={Juang, Biing-Hwang and Hou, Wu and Lee, Chin-Hui},
  journal=TransSAP,
  volume={5},
  number={3},
  pages={257--265},
  year={1997},
}

@article{HMM-Rabiner:1986,
  title={An introduction to hidden Markov models},
  author={Rabiner, Lawrence and Juang, Biing-Hwang},
  journal={IEEE ASSP Magazine},
  volume={3},
  number={1},
  pages={4--16},
  year={1986},
}

@book{SpokenLanguageProcessing-Huang+2001,
  title={Spoken language processing},
  author={Huang, Xuedong and Acero, Alex and Hon, Hsiao-Wuen and others},
  volume={18},
  year={2001},
  publisher={Prentice Hall Englewood Cliffs}
}

@article{InCarMediaSearch-Seltzer+2011,
  title={In-car media search},
  author={Seltzer, Michael L and Ju, Yun-Cheng and Tashev, Ivan and Wang, Ye-Yi and Yu, Dong},
  journal=SPM,
  volume={28},
  number={4},
  pages={50--60},
  year={2011},
}

@article{VoiceSearch-Wang+2008,
  title={An introduction to voice search},
  author={Wang, Ye-Yi and Yu, Dong and Ju, Yun-Cheng and Acero, Alex},
  journal=SPM,
  volume={25},
  number={3},
  pages={28--38},
  year={2008},
}

@Misc{MSR-S2S-Clayton2012,
author = "Clayton, Steve",
title = "Microsoft Research shows a promising new breakthrough in speech translation technology",
year = "2012",
url = "http://blogs.technet.com/b/next/archive/2012/11/08/microsoft-research-shows-a-promising-new-breakthrough-in-speech-translation-technology.aspx"
}


@article{EM-MinimumDivergence-Brummer2009,
  title={The {EM} algorithm and minimum divergence},
  author={Br{\"u}mmer, Niko},
  journal={Online http://niko. brummer. googlepages. Agnitio Labs Technical Report},
  year={2009}
}

@article{JointFactorAnalysis-Kenny2005,
  title={Joint factor analysis of speaker and session variability: Theory and algorithms},
  author={Kenny, Patrick},
  journal={CRIM, Montreal,(Report) CRIM-06/08-13},
  year={2005}
}

@inproceedings{i-vector-rapidAdapt-Bacchiani2013,
  title={Rapid adaptation for mobile speech applications},
  author={Bacchiani, Michiel},
  booktitle=ICASSP,
  pages={7903--7907},
  year={2013},
}

@inproceedings{i-vector-cluster-Yao+2012,
  title={A Feature Space Transformation Method for Personalization using Generalized I-Vector Clustering.},
  author={Yao, Kaisheng and Gong, Yifan and Liu, Chaojun},
  booktitle=INTERSPEECH,
  year={2012}
}

@inproceedings{GMMAdapt-i-vector-Karafiat+2011,
  title={i{V}ector-based discriminative adaptation for automatic speech recognition},
  author={Karafi{\'a}t, Martin and Burget, Luk{\'a}s and Matejka, Pavel and Glembek, Ondrej and Cernocky, J},
  booktitle=ASRU,
  pages={152--157},
  year={2011},
}

@inproceedings{I-vector-Simplification-Glembek+2011,
  title={Simplification and optimization of i-vector extraction},
  author={Glembek, Ondrej and Burget, Lukas and Matejka, Pavel and Karafi{\'a}t, Martin and Kenny, Patrick},
  booktitle=ICASSP,
  pages={4516--4519},
  year={2011},
}

@article{I-Vector-Dehak+2011,
  title={Front-end factor analysis for speaker verification},
  author={Dehak, Najim and Kenny, Patrick and Dehak, R{\'e}da and Dumouchel, Pierre and Ouellet, Pierre},
  journal=TransASLP,
  volume={19},
  number={4},
  pages={788--798},
  year={2011},
}

@inproceedings{DNNAdapt-SpeakerCode-Ossama:2013,
  title={Fast speaker adaptation of hybrid {NN/HMM} model for speech recognition based on discriminative learning of speaker code},
  author={Abdel-Hamid, Ossama and Jiang, Hui},
  booktitle=ICASSP,
  pages={7942--7946},
  year={2013},
}

@inproceedings{LowFootPrintDNNAdaptation-Xue+2014,
  title={Singular Value Decomposition Based Low-footprint Speaker Adaptation and Personalization for Deep Neural Network},
  author={Xue, Jian and Li, Jinyu and Yu, Dong and Seltzer, Mike and Gong, Yifan},
  booktitle=ICASSP,
  year={2014},
}

@inproceedings{NeighborSelection4Adaptation-Nallasamy+2013,
  title={Neighbour selection and adaptation for rapid speaker-dependent ASR},
  author={Nallasamy, Udhyakumar and Fuhs, Mark and Woszczyna, Monika and Metze, Florian and Schultz, Tanja},
  booktitle=ASRU,
  pages={60--65},
  year={2013},
}

@inproceedings{i-Vector-SpeakerAdapt-Saon+2013,
  title={Speaker adaptation of neural network acoustic models using i-vectors},
  author={Saon, George and Soltau, Hagen and Nahamoo, David and Picheny, Michael},
  booktitle=ASRU,
  pages={55--59},
  year={2013},
}

@inproceedings{ANN-SpeakerAdaptiveTraining-Jan+2010,
  title={On speaker adaptive training of artificial neural networks},
  author={Jan, Trmal and Jan, Zelinka and M{\"u}ller, Lud{\v{e}}k},
  booktitle={Proceedings of Int. Conf. Interspeech 2010},
  year={2010}
}

@article{Hyper-Parameter-Search-Snoek+2012,
  title={Practical Bayesian optimization of machine learning algorithms},
  author={Snoek, Jasper and Larochelle, Hugo and Adams, Ryan P},
  journal={arXiv preprint arXiv:1206.2944},
  year={2012}
}

@article{DTNN-Yu+2013,
  title={The deep tensor neural network with applications to large vocabulary speech recognition},
  author={Yu, Dong and Deng, Li and Seide, Frank},
  booktitle=TransASLP,
  volume={21},
  pages={388-396}, 
  number={3},
  year={2013},
}

@inproceedings{Factorized-DNN-Adapt-Yu+2012,
  title={Factorized deep neural networks for adaptive speech recognition},
  author={Yu, Dong and Chen, Xin and Deng, Li},
  booktitle={Proc. Int. Workshop on Statistical Machine Learning for Speech Processing},
  year={2012}
}

@inproceedings{PCA-Subspace-Adapt-Dupont:2000,
  title={Fast speaker adaptation of artificial neural networks for automatic speech recognition},
  author={Dupont, St{\'e}phane and Cheboub, Leila},
  booktitle=ICASSP,
  volume={3},
  pages={1795--1798},
  year={2000},
}

@inproceedings{Conservative-Adapt-Li:2006,
  title={Regularized adaptation of discriminative classifiers},
  author={Li, Xiao and Bilmes, Jeff},
  booktitle={Acoustics, Speech and Signal Processing, 2006. ICASSP 2006 Proceedings. 2006 IEEE International Conference on},
  volume={1},
  pages={I--I},
  year={2006},
  organization={IEEE}
}

@inproceedings{ConservativeAdapt-Albesano+2006,
  title={Adaptation of Artificial Neural Networks Avoiding Catastrophic Forgetting},
  author={Albesano, Dario and Gemello, Roberto and Laface, Pietro and Mana, Franco and Scanzio, Stefano},
  booktitle=IJCNN,
  pages={1554--1561},
  year={2006},
}

@inproceedings{ConservativeAdaptSelectedWeights-Stadermann:2005,
  title={Two-stage speaker adaptation of hybrid tied-posterior acoustic models},
  author={Stadermann, Jan and Rigoll, Gerhard},
  booktitle=ICASSP,
  year={2005}
}

@inproceedings{LON-oDLR-Yao+2012,
  title={Adaptation of context-dependent deep neural networks for automatic speech recognition},
  author={Yao, Kaisheng and Yu, Dong and Seide, Frank and Su, Hang and Deng, Li and Gong, Yifan},
  booktitle=SLT,
  pages={366--369},
  year={2012},
}

@inproceedings{LIN-Trmal+2010,
  title={Adaptation of a feedforward artificial neural network using a linear transform},
  author={Trmal, Jan and Zelinka, Jan and M{\"u}ller, Lud{\v{e}}k},
  booktitle={Text, Speech and Dialogue},
  pages={423--430},
  year={2010},
  organization={Springer}
}

@inproceedings{LIN-Xiao+2012,
  title={A Initial Attempt on Task-Specific Adaptation for Deep Neural Network-based Large Vocabulary Continuous Speech Recognition.},
  author={Xiao, Yeming and Zhang, Zhen and Cai, Shang and Pan, Jielin and Yan, Yonghong},
  booktitle=INTERSPEECH,
  year={2012}
}

@article{LHN-Gemello+2007,
  title={Linear hidden transformations for adaptation of hybrid {ANN/HMM} models},
  author={Gemello, Roberto and Mana, Franco and Scanzio, Stefano and Laface, Pietro and De Mori, Renato},
  journal={Speech Communication},
  volume={49},
  number={10},
  pages={827--835},
  year={2007},
}

@inproceedings{Compare-LIN-LON-Li:2010,
  title={Comparison of discriminative input and output transformations for speaker adaptation in the hybrid {NN/HMM} systems.},
  author={Li, Bo and Sim, Khe Chai},
  booktitle=INTERSPEECH,
  pages={526--529},
  year={2010}
}

@article{LIN-Albesano+2000,
  title={Hybrid {HMM-NN} modeling of stationary--transitional units for continuous speech recognition},
  author={Albesano, Dario and Gemello, Roberto and Mana, Franco},
  journal={Information Sciences},
  volume={123},
  number={1},
  pages={3--11},
  year={2000},
}

@article{LIN-Neto+1995,
  title={Speaker-adaptation for hybrid {HMM-ANN} continuous speech recognition system},
  author={Neto, Joao and Almeida, Lu{\'\i}s and Hochberg, Mike and Martins, Ciro and Nunes, Lu{\'\i}s and Renals, Steve and Robinson, Tony},
  booktitle=EUROSPEECH,
  year={1995},
  pages={2171-2174},
}

@inproceedings{LIN-Abrash+1995,
  title={Connectionist speaker normalization and adaptation},
  author={Abrash, Victor and Franco, Horacio and Sankar, Ananth and Cohen, Michael},
  booktitle=EUROSPEECH,
  year={1995},
}


@article{CD-ANN-HMM-Franco+1994,
  title={Context-dependent connectionist probability estimation in a hybrid hidden {M}arkov model-neural net speech recognition system},
  author={Franco, Horacio and Cohen, Michael and Morgan, Nelson and Rumelhart, David and Abrash, Victor},
  journal=CSL,
  volume={8},
  number={3},
  pages={211--222},
  year={1994},
}

@inproceedings{MAPLR-Chesta+1999,
  title={Maximum a posteriori linear regression for hidden Markov model adaptation.},
  author={Chesta, Cristina and Siohan, Olivier and Lee, Chin-Hui},
  booktitle={Eurospeech},
  year={1999}
}

@article{MLLR-Leggetter:1995,
  title={Maximum likelihood linear regression for speaker adaptation of continuous density hidden {M}arkov models},
  author={Leggetter, Christopher J and Woodland, PC},
  journal=CSL,
  volume={9},
  number={2},
  pages={171--185},
  year={1995},
}

@article{Adaptation-Lee:2000,
  title={On adaptive decision rules and decision parameter adaptation for automatic speech recognition},
  author={Lee, Chin-Hui and Huo, Qiang},
  journal={Proceedings of the IEEE},
  volume={88},
  number={8},
  pages={1241--1269},
  year={2000},
}

@article{JointCompensation-Li+2009,
  title={A unified framework of {HMM} adaptation with joint compensation of additive and convolutive distortions},
  author={Li, Jinyu and Deng, Li and Yu, Dong and Gong, Yifan and Acero, Alex},
  journal=CSL,
  volume={23},
  number={3},
  pages={389--405},
  year={2009},
}

@inproceedings{TransferLearning-DNN-Ciresan+2012,
  title={Transfer learning for {L}atin and {C}hinese characters with deep neural networks},
  author={Ciresan, Dan Claudiu and Meier, Ueli and Schmidhuber, J{\"u}rgen},
  booktitle=IJCNN,
  pages={1--6},
  year={2012},
}

@INPROCEEDINGS{Multitask-MLP-ASR-Lu+2004,
author={Youyi Lu and Fei Lu and Siddharth Sehgal and Swati Gupta and Jingsheng Du and Chee Hong Tham and Phil Green and
Vincent Wan},
booktitle={Proc. Australian International Conference on Speech Science and Technology},
title={MULTITASK LEARNING IN CONNECTIONIST SPEECH RECOGNITION},
year={2004},
month={dec},
}

@article{MultitaskLearning-Caruana1997,
  title={Multitask learning},
  author={Caruana, Rich},
  journal={Machine learning},
  volume={28},
  number={1},
  pages={41--75},
  year={1997},
}

@article{IPA-1989,
  title={Report on the 1989 {K}iel convention},
  author={International Phonetic Association and others},
  journal={Journal of the International Phonetic Association},
  volume={19},
  number={2},
  pages={67--80},
  year={1989}
}

@article{PLP-Hermansky1990,
  title={Perceptual linear predictive ({PLP}) analysis of speech},
  author={Hermansky, Hynek},
  journal={The Journal of the Acoustical Society of America},
  volume={87},
  pages={1738},
  year={1990}
}

@inproceedings{FDLP-Athineos:2003,
  title={Frequency-domain linear prediction for temporal features},
  author={Athineos, Marios and Ellis, Daniel PW},
  booktitle=ASRU,
  pages={261--266},
  year={2003},
}

@inproceedings{Crosslingual-HMM-Yu+2009,
  title={Cross-lingual speech recognition under runtime resource constraints},
  author={Yu, Dong and Deng, Li and Liu, Peng and Wu, Jian and Gong, Yifan and Acero, Alex},
  booktitle=ICASSP,
  pages={4193--4196},
  year={2009},
}

@inproceedings{Multilingual-HMM-Lin+2009,
  title={A study on multilingual acoustic modeling for large vocabulary {ASR}},
  author={Lin, Hui and Deng, Li and Yu, Dong and Gong, Yi-fan and Acero, Alex and Lee, Chin-Hui},
  booktitle=ICASSP,
  pages={4333--4336},
  year={2009},
}

@inproceedings{MultilingualAndCrosslingual-Schultz:1998,
  title={Multilingual and crosslingual speech recognition},
  author={Schultz, Tanja and Waibel, Alex},
  booktitle={Proc. {DARPA} Workshop on Broadcast News Transcription and Understanding},
  pages={259--262},
  year={1998},
}

@inproceedings{MLP-Crosslingual-Tandem-Thomas+2012,
  title={Multilingual {MLP} features for low-resource {LVCSR} systems},
  author={Thomas, Samuel and Ganapathy, Sriram and Hermansky, Hynek},
  booktitle=ICASSP,
  pages={4269--4272},
  year={2012},
}

@inproceedings{MLP-Crosslingual-Tandem-Qian:2012,
  title={Cross-Lingual and Ensemble {MLP}s Strategies for Low-Resource Speech Recognition.},
  author={Qian, Yanmin and Liu, Jia},
  booktitle=INTERSPEECH,
  year={2012}
}

@inproceedings{MLP-Crosslingual-Tandem-Plahl+2011,
  title={Cross-lingual portability of Chinese and English neural network features for French and German {LVCSR}},
  author={Plahl, Christian and Schluter, R and Ney, Hermann},
  booktitle=ASRU,
  pages={371--376},
  year={2011},
}

@inproceedings{MLP-Crosslingual-Tandem-Thomas+2010,
  title={Cross-lingual and multi-stream posterior features for low resource {LVCSR} systems.},
  author={Thomas, Samuel and Ganapathy, Sriram and Hermansky, Hynek},
  booktitle=INTERSPEECH,
  pages={877--880},
  year={2010}
}

@inproceedings{MultilingualDNN-Ghoshal+2013,
  title={Multilingual training of Deep-Neural netowrks},
  author={Ghoshal, Arnab and Swietojanski, Pawel and Renals, Steve},
  year={2013},
  organization=ICASSP
}

@inproceedings{MultilingualDNN-Heigold+2013,
  title={Multilingual acoustic models using distributed deep neural networks},
  author={Heigold, G and Vanhoucke, V and Senior, A and Nguyen, P and Ranzato, M and Devin, M and Dean, J},
  year={2013},
  organization=ICASSP
}

@inproceedings{MultilingualDNN-Huang+2013,
  title={Cross-language knowledge transfer using multilingual deep neural network with shared hidden layers},
  author={Huang, Jui-Ting and Li, Jinyu and Yu, Dong and Deng, Li and Gong, Yifan},
  booktitle=ICASSP,
  year={2013}
}

@inproceedings{FastDropout-Wang:2013,
  title={Fast dropout training},
  author={Wang, Sida and Manning, Christopher},
  booktitle={Proceedings of the 30th International Conference on Machine Learning (ICML-13)},
  pages={118--126},
  year={2013}
}

@article{MBR-Combine-Xu+2011,
  title={Minimum {B}ayes Risk decoding and system combination based on a recursion for edit distance},
  author={Xu, Haihua and Povey, Daniel and Mangu, Lidia and Zhu, Jie},
  journal=CSL,
  volume={25},
  number={4},
  pages={802--828},
  year={2011},
}

@inproceedings{DNN-FrameCombine-Swietojanski+2013,
  title={Revisiting Hybrid and {GMM-HMM} system combination techniques},
  author={Swietojanski, Pawel and Ghoshal, Arnab and Renals, Steve},
  booktitle=ICASSP,
  year={2013}
}

@inproceedings{SCARF-Zweig:2010,
  title={{SCARF}: a segmental conditional random field toolkit for speech recognition.},
  author={Zweig, Geoffrey and Nguyen, Patrick},
  booktitle=INTERSPEECH,
  pages={2858--2861},
  year={2010}
}

@inproceedings{DNN-LVSR-SCARF-Jaitly+2012,
  title={Application of Pretrained Deep Neural Networks to Large Vocabulary Speech Recognition.},
  author={Jaitly, Navdeep and Nguyen, Patrick and Senior, Andrew W and Vanhoucke, Vincent},
  booktitle=INTERSPEECH,
  year={2012}
}

@inproceedings{ROVER-Fiscus1997,
  title={A post-processing system to yield reduced word error rates: Recognizer output voting error reduction {(ROVER)}},
  author={Fiscus, Jonathan G},
  booktitle=ASRU,
  pages={347--354},
  year={1997},
}

@inproceedings{Bottleneck-Autoencoder-Sainath+2012,
  title={Auto-encoder bottleneck features using deep belief networks},
  author={Sainath, Tara N and Kingsbury, Brian and Ramabhadran, Bhuvana},
  booktitle=ICASSP,
  pages={4153--4156},
  year={2012},
}

@inproceedings{RDLT-Zhang+2006,
  title={Discriminatively trained region dependent feature transforms for speech recognition},
  author={Zhang, Bing and Matsoukas, Spyros and Schwartz, Richard},
  booktitle=ICASSP,
  volume={1},
  pages={I--I},
  year={2006},
}

@inproceedings{RDLT-Yan+2013,
  title={Tied-state based discriminative training of context-expanded region-dependent feature transforms for {LVCSR}},
  author={Yan, Zhi-Jie and Huo, Qiang and Xu, Jian and Zhang, Yu},
  booktitle=ICASSP,
  pages={6940--6944},
  year={2013},
}

@inproceedings{DNNFeature-Yan+2013,
  title={A scalable approach to using {DNN}-derived features in {GMM-HMM} based acoustic modeling for {LVCSR}},
  author={Yan, Zhijie and Huo, Qiang and Xu, Jian},
  booktitle=INTERSPEECH,
  year={2013}
}

@article{GMM-Loglinear-Equivalent-Heigold+2011,
  title={Equivalence of generative and log-linear models},
  author={Heigold, Georg and Ney, Hermann and Lehnen, Patrick and Gass, Tobias and Schluter, Ralf},
  journal=TransASLP,
  volume={19},
  number={5},
  pages={1138--1148},
  year={2011},
}

@inproceedings{DNN-Bottlenec-Yu:2011,
  title={Improved Bottleneck Features Using Pretrained Deep Neural Networks.},
  author={Yu, Dong and Seltzer, Michael L},
  booktitle=INTERSPEECH,
  pages={237--240},
  year={2011}
}

@inproceedings{DirichletMixtureModel-Balakrishnan+2011,
  title={Dirichlet Mixture Models of neural net posteriors for HMM-based speech recognition},
  author={Balakrishnan, V and Sivaram, Garimella SVS and Khudanpur, Sanjeev},
  booktitle=ICASSP,
  pages={5028--5031},
  year={2011},
}

@inproceedings{MLPFeature-SRI-Arabic-Vergyri+2008,
  title={Development of the {SRI}/nightingale {A}rabic {ASR} system.},
  author={Vergyri, Dimitra and Mandal, Arindam and Wang, Wen and Stolcke, Andreas and Zheng, Jing and Graciarena, Martin and Rybach, David and Gollan, Christian and Schl{\"u}ter, Ralf and Kirchhoff, Katrin and others},
  booktitle=INTERSPEECH,
  pages={1437--1440},
  year={2008}
}

@inproceedings{MLPFeature-Fousek+2008,
 author               = {Petr Fousek and Lori Lamel and Jean-Luc Gauvain},
 booktitle            =  INTERSPEECH,
 pages                = {1433-1436},
 title                = {Transcribing Broadcast Data Using {MLP} Features},
 year                 = {2008},
 }

@INPROCEEDINGS{Tandem-Zhu+2005,
    author = {Qifeng Zhu and Andreas Stolcke and Barry Y. Chen and Nelson Morgan},
    title = {Using {MLP} features in {SRI}'s conversational speech recognition system},
    booktitle =  INTERSPEECH,
    year = {2005},
    pages = {2141--2144}
}

@InProceedings {Bottleneck-Valente+2010,
author= {Valente, Fabio and Doss, Mathew Magimai and Plahl, Christian and Ravuri, Suman and Wang, Wen},
title= {A Comparative Large Scale Study of {MLP} Features for Mandarin {ASR}},
booktitle= INTERSPEECH,
year= {2010},
pages= {2630-2633},
}

@INPROCEEDINGS{Bottleneck-Grezl+2007,
   author = {Frantisek Gr{\'e}zl and Martin Karafi{\'a}t and Stanislav Kont{\'a}r and Jan \v{C}ernock\'{y}},
   title = {Probabilistic and bottle-neck features for {LVCSR} of meetings},
   pages = {757--760},
   booktitle = ICASSP,
   year = {2007},
}

@inproceedings{Bottleneck-GrezlFousek:2008,
  author    = {Frantisek Gr{\'e}zl and  Petr Fousek},
  title     = {Optimizing bottle-neck features for {LVCSR}},
  booktitle =  ICASSP,
  year      = {2008},
  pages     = {4729-4732},
}

@inproceedings{MultiStageArch-Jarrett+2009,
  title={What is the best multi-stage architecture for object recognition?},
  author={Jarrett, Kevin and Kavukcuoglu, Koray and Ranzato, Marc’Aurelio and LeCun, Yann},
  booktitle=ICCV,
  pages={2146--2153},
  year={2009},
}

@inproceedings{UnderstandDNN4AM-Mohamed+2012,
  title={Understanding how deep belief networks perform acoustic modelling},
  author={Mohamed, Abdel-rahman and Hinton, Geoffrey and Penn, Gerald},
  booktitle=ICASSP,
  pages={4273--4276},
  year={2012},
}

@inproceedings{LearnFilterbankInDNN-Sainath+2013,
  title={Learning filter banks within a deep neural network framework},
  author={Sainath, Tara N. and  Kingsbury, Brian and Mohamed, Abdel-rahman and Ramabhadran, Bhuvana },
  booktitle=ASRU,
  year={2013},
}

@inproceedings{DNN-MixedBandwidth-Li+2012,
  title={Improving wideband speech recognition using mixed-bandwidth training data in {CD-DNN-HMM}},
  author={Li, Jinyu and Yu, Dong and Huang, Jui-Ting and Gong, Yifan},
  booktitle=SLT,
  pages={131--136},
  year={2012},
}

@article{JointNAT-VTS-MLLR-Wang:2012,
  title={Speaker and noise factorization for robust speech recognition},
  author={Wang, Yongqiang and Gales, Mark JF},
  journal=TransASLP,
  volume={20},
  number={7},
  pages={2149--2158},
  year={2012},
}

@inproceedings{DerivativeKernel-Aurora4-ragni:2011,
  title={Derivative kernels for noise robust {ASR}},
  author={Ragni, Anton and Gales, MJF},
  booktitle=ASRU,
  pages={119--124},
  year={2011},

}

@inproceedings{NAT-VTS-Aurora4-Flego2009,
  title={Discriminative adaptive training with {VTS} and {JUD}},
  author={Flego, Federico and Gales, Mark JF},
  booktitle=ASRU,
  pages={170--175},
  year={2009},
}

@article{Dropout-Hinton+2012,
  title={Improving neural networks by preventing co-adaptation of feature detectors},
  author={Hinton, Geoffrey E and Srivastava, Nitish and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan R},
  journal={arXiv preprint arXiv:1207.0580},
  year={2012}
}

@article{NAT-Kalinli+2010,
  title={Noise adaptive training for robust automatic speech recognition},
  author={Kalinli, Ozlem and Seltzer, Michael L and Droppo, Jasha and Acero, Alex},
  journal={Audio, Speech, and Language Processing, IEEE Transactions on},
  volume={18},
  number={8},
  pages={1889--1901},
  year={2010},
}

@article{Aurora4-Parihar:2002,
  title={Aurora working group: {DSR} front end {LVCSR} evaluation {AU}/384/02},
  author={Parihar, N and Picone, J},
  journal={Inst. for Signal and Information Process, Mississippi State University, Tech. Rep},
  year={2002}
}

@inproceedings{DNN-NoiseRobust-Seltzer+2013,
  title={An investigation of deep neural networks for noise robust speech recognition},
  author={Seltzer, Michael and Yu, Dong and Wang, Yongqiang},
  booktitle=ICASSP,
  year={2013}
}

@article{MLLR-Gales:1996,
  title={Mean and variance adaptation within the MLLR framework},
  author={Gales, Mark JF and Woodland, PC},
  journal=CSL,
  volume={10},
  number={4},
  pages={249--264},
  year={1996},
}

@article{VTS-Kim+1998,
  title={Speech recognition in noisy environments using first-order vector {T}aylor series},
  author={Kim, Do Yeong and Kwan Un, Chong and Kim, Nam Soo},
  journal={Speech Communication},
  volume={24},
  number={1},
  pages={39--49},
  year={1998},
}

@inproceedings{VTS-Moreno+1996,
  title={A vector {T}aylor series approach for environment-independent speech recognition},
  author={Moreno, Pedro J and Raj, Bhiksha and Stern, Richard M},
  booktitle=ICASSP,
  volume={2},
  pages={733--736},
  year={1996},
}

@inproceedings{VTS-Phase-Li+2008,
  title={{HMM} adaptation using a phase-sensitive acoustic distortion model for environment-robust speech recognition},
  author={Li, Jinyu and Deng, Li and Yu, Dong and Gong, Yifan and Acero, Alex},
  booktitle=ICASSP,
  pages={4069--4072},
  year={2008},
}

@inproceedings{VTS-JointAdapt-Li+2007,
  title={High-performance {HMM} adaptation with joint compensation of additive and convolutive distortions via vector {T}aylor series},
  author={Li, Jinyu and Deng, Li and Yu, Dong and Gong, Yifan and Acero, Alex},
  booktitle=ASRU,
  pages={65--70},
  year={2007},
}

@inproceedings{acero2000hmm,
  title={{HMM} adaptation using vector taylor series for noisy speech recognition},
  author={Acero, Alex and Deng, Li and Kristjansson, Trausti T and Zhang, Jerry},
  booktitle=INTERSPEECH,
  pages={869--872},
  year={2000}
}

@article{fMLLR-Gales1998,
  title={Maximum likelihood linear transformations for {HMM}-based speech recognition},
  author={Gales, Mark JF},
  journal=CSL,
  volume={12},
  number={2},
  pages={75--98},
  year={1998},
}

@inproceedings{VTLN-Andreou+1994,
  author = "Andreou, Andreas and Kamm, Theresa and Cohen, Jordan",
  title = "Experiments in Vocal Tract Normalization",
  booktitle = "Proc. the CAIP Workshop:  Frontiers in Speech Recognition II",
  year = {1994}
}

@article{StochasticPoolReg-DCNN-Zeiler:2013,
  title={Stochastic pooling for regularization of deep convolutional neural networks},
  author={Zeiler, Matthew D and Fergus, Rob},
  journal={arXiv preprint arXiv:1301.3557},
  year={2013}
}

@article{MaxEntModel-Parkhi1997,
  title={A simple introduction to maximum entropy models for natural language processing},
  author={Ratnaparkhi, Adwait},
  journal={IRCS Technical Reports Series},
  pages={81},
  year={1997}
}

@article{MFCC-Davis:1980,
  title={Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences},
  author={Davis, Steven and Mermelstein, Paul},
  journal={Acoustics, Speech and Signal Processing, IEEE Transactions on},
  volume={28},
  number={4},
  pages={357--366},
  year={1980},
  publisher={IEEE}
}

@inproceedings{SIFT-Lowe1999,
  title={Object recognition from local scale-invariant features},
  author={Lowe, David G},
  booktitle={Computer vision, 1999. The proceedings of the seventh IEEE international conference on},
  volume={2},
  pages={1150--1157},
  year={1999},
  organization={Ieee}
}

@article{Sequence-ANN-HMM-Hennebert+1997,
  title={Estimation of global posteriors and forward-backward training of hybrid HMM/ANN systems.},
  author={Hennebert, Jean and Ris, Christophe and Bourlard, Herve and Renals, Steve and Morgan, Nelson},
  year={1997},
  booktitle=EUROSPEECH,
}

@article{LagrangeMultiplierSummary-Bertsekas1982,
  title={Constrained optimization and Lagrange multiplier methods},
  author={Bertsekas, Dimitri P},
  journal={Computer Science and Applied Mathematics, Boston: Academic Press, 1982},
  volume={1},
  year={1982}
}

@book{LagrangeMultiplierProposed-Powell1967,
  title={A method for non-linear constraints in minimization problems},
  author={Powell, Michael JD},
  year={1967},
  publisher={UKAEA}
}

@article{LagrangeMultiplierProposed-Hestenes1969,
  title={Multiplier and gradient methods},
  author={Hestenes, Magnus R},
  journal={Journal of optimization theory and applications},
  volume={4},
  number={5},
  pages={303--320},
  year={1969},
  publisher={Springer}
}

@article{ADMM-Boyd+2011,
  title={Distributed optimization and statistical learning via the alternating direction method of multipliers},
  author={Boyd, Stephen and Parikh, Neal and Chu, Eric and Peleato, Borja and Eckstein, Jonathan},
  journal={Foundations and Trends{\textregistered} in Machine Learning},
  volume={3},
  number={1},
  pages={1--122},
  year={2011},
  publisher={Now Publishers Inc.}
}

@inproceedings{KLD-DNN-Adaptation-Yu+2013,
  title={KL-divergence regularized deep neural network adaptation for improved large vocabulary speech recognition},
  author={Yu, Dong and Yao, Kaisheng and Su, Hang and Li, Gang and Seide, Frank},
  booktitle=ICASSP,
  pages={7893-7897},
  year={2013},
}

@article{MBR-Goel:2000,
  title={Minimum {B}ayes-risk automatic speech recognition},
  author={Goel, Vaibhava and Byrne, William J},
  journal=CSL,
  volume={14},
  number={2},
  pages={115--135},
  year={2000},
}

@inproceedings{SequenceDNN-Vesely+2013,
  title={Sequence-discriminative training of deep neural networks},
  author={Vesel{\`y}, Karel and Ghoshal, Arnab and Burget, Luk{\'a}{\v{s}} and Povey, Daniel},
  booktitle = INTERSPEECH,
  year={2013}
}

@phdthesis{Thesis-Povey2003,
 author               = {Dan Povey},
 school               = {Cambridge University Engineering Dept},
 title                = {Discriminative training for large vocabulary speech recognition},
 year                 = {2003},
 }

@inproceedings{SequenceTrainNN-sMBR-Kingsbury2009,
  title={Lattice-based optimization of sequence classification criteria for neural-network acoustic modeling},
  author={Kingsbury, Brian},
  booktitle=ICASSP,
  pages={3761--3764},
  year={2009},
}

@inproceedings{BottleneckDBN-Sainath+2012,
  title={Auto-encoder bottleneck features using deep belief networks},
  author={Sainath, Tara N and Kingsbury, Brian and Ramabhadran, Bhuvana},
  booktitle=ICASSP,
  pages={4153--4156},
  year={2012},
}

@inproceedings{DNN4LVSR-Sainath+2011,
  title={Making deep belief networks effective for large vocabulary continuous speech recognition},
  author={Sainath, Tara N and Kingsbury, Brian and Ramabhadran, Bhuvana and Fousek, Petr and Novak, Petr and Mohamed, A-r},
  booktitle=ASRU,
  pages={30--35},
  year={2011},
}

@InProceedings{SequenceTrainDNN-Mohamed+2010,
author = {Abdel-rahman Mohamed and Dong Yu and Li Deng},
title = {Investigation of Full-Sequence Training of Deep Belief Networks for Speech Recognition},
booktitle = INTERSPEECH,
pages = {2846-2849},
year = {2010},
}

@inproceedings{MultiFrameDNN-Vanhoucke+2013,
  title={MULTIFRAME DEEP NEURAL NETWORKS FOR ACOUSTIC MODELING},
  author={Vanhoucke, Vincent and Devin, Matthieu and Heigold, Georg}
  booktitle=ICASSP,
  year={1989}
}

@article{Sparse-SecondOrderPrune-Hassibi+1993,
  title={Second order derivatives for network pruning: Optimal brain surgeon},
  author={Hassibi, Babak and Stork, David G and others},
  journal=NIPS,
  pages={164--164},
  year={1993},
}

@inproceedings{Sparse-BrainDamage-Lecun+1989,
  title={Optimal brain damage.},
  author={LeCun, Yann and Denker, John S and Solla, Sara A and Howard, Richard E and Jackel, Lawrence D},
  booktitle=NIPS,
  volume={2},
  pages={598--605},
  year={1989}
}

@article{SparseTruncatedGradient-Langford+2009,
  title={Sparse online learning via truncated gradient},
  author={Langford, John and Li, Lihong and Zhang, Tong},
  journal=JMLR,
  volume={10},
  pages={777--801},
  year={2009},
}

@inproceedings{DNNSpeedup-Vanhoucke+2011,
  title={Improving the speed of neural networks on {CPU}s},
  author={Vanhoucke, Vincent and Senior, Andrew and Mao, Mark Z},
  booktitle={Proc. NIPS Workshop on Deep Learning and Unsupervised Feature Learning},
  year={2011}
}

@InProceedings{HessianFree-Martens2010,
  author =    {James Martens},
  title =     {Deep learning via {H}essian-free optimization},
  booktitle = ICML,
  pages =     {735--742},
  year =      2010,
  month =     {June},
}

@inproceedings{RNN-HessianFree-Martens:2011,
  title={Learning recurrent neural networks with {H}essian-free optimization},
  author={Martens, James and Sutskever, Ilya},
  booktitle=ICML,
  pages={1033--1040},
  year={2011}
}

@INPROCEEDINGS{HessianFreeDNN-Sequence-Kingsbury+2012,
author={Kingsbury, Brian and Sainath, Tara N and Soltau, Hagen},
booktitle= INTERSPEECH,
title={Scalable Minimum Bayes Risk Training of Deep Neural Network Acoustic Models Using Distributed Hessian-free Optimization},
year={2012},
}

@inproceedings{ClusteredDNNTrainining-Zhou+2013,
  title={A cluster-based multiple deep neural networks method for large vocabulary continuous speech recognition},
  author={Zhou, Pan and Liu, Cong and Liu, Qingfeng and Dai, Lirong and Jiang, Hui},
  booktitle=ICASSP,
  pages={6650--6654},
  year={2013},
}


@InProceedings{SparseDNN-Yu+2012,
author = {D. Yu and F. Seide and G.Li and L. Deng},
title = {Exploiting sparseness in deep neural networks for large vocabulary speech recognition},
booktitle = ICASSP,
year ={2012},
pages = {4409-4412}
}

@inproceedings{LowRank-Xue+2013,
  title={Restructuring of deep neural network acoustic models with singular value decomposition},
  author={Xue, Jian and Li, Jinyu and Gong, Yifan},
  booktitle=INTERSPEECH,
  year={2013}
}

@inproceedings{LowRank-Sainath+2013,
  title={Low-rank matrix factorization for deep neural network training with high-dimensional output targets},
  author={Sainath, Tara N and Kingsbury, Brian and Sindhwani, Vikas and Arisoy, Ebru and Ramabhadran, Bhuvana},
  booktitle=ICASSP,
  pages={6655--6659},
  year={2013},
}

@article{AsynchronousSGD-Hogwild-Niu+2011,
  title={Hogwild!: A lock-free approach to parallelizing stochastic gradient descent},
  author={Niu, Feng and Recht, Benjamin and R{\'e}, Christopher and Wright, Stephen J},
  journal={arXiv preprint arXiv:1106.5730},
  year={2011}
}

@article{Unsupervise-AsynchronousSGD-Le+2011,
  title={Building high-level features using large scale unsupervised learning},
  author={Le, Quoc V and Ranzato, Marc'Aurelio and Monga, Rajat and Devin, Matthieu and Chen, Kai and Corrado, Greg S and Dean, Jeff and Ng, Andrew Y},
  journal={arXiv preprint arXiv:1112.6209},
  year={2011}
}

@inproceedings{AsynchronousSGD-Zhang+2013,
  title={Asynchronous stochastic gradient descent for {DNN} training},
  author={Zhang, Shanshan and Zhang, Ce and You, Zhao and Zheng, Rong and Xu, Bo},
  booktitle=ICASSP,
  pages={6660--6663},
  year={2013},
}

@article{MapReduce-Dean:2008,
  title={MapReduce: simplified data processing on large clusters},
  author={Dean, Jeffrey and Ghemawat, Sanjay},
  journal={Communications of the {ACM}},
  volume={51},
  number={1},
  pages={107--113},
  year={2008},
}

@article{PipelinedBP-Petrowski+1993,
  title={Performance analysis of a pipelined backpropagation parallel algorithm},
  author={Petrowski, Alain and Dreyfus, Gerard and Girault, Claude},
  journal={{IEEE} Transactions on Neural Networks},
  volume={4},
  number={6},
  pages={970--981},
  year={1993},
}

@inproceedings{PipelinedBP-Chen+2012,
  title={Pipelined Back-Propagation for Context-Dependent Deep Neural Networks.},
  author={Chen, Xie and Eversole, Adam and Li, Gang and Yu, Dong and Seide, Frank},
  booktitle=INTERSPEECH,
  year={2012}
}

@inproceedings{SWB-Godfrey+1992,
  title={SWITCHBOARD: Telephone speech corpus for research and development},
  author={Godfrey, John J and Holliman, Edward C and McDaniel, Jane},
  booktitle= ICASSP,
  volume={1},
  pages={517--520},
  year={1992},
}

@InProceedings{DNN-LVSR-Jaitly+2012,
author = {Navdeep Jaitly and Patrick Nguyen and Andrew W. Senior and Vincent Vanhoucke},
title = {Application Of Pretrained Deep Neural Networks To Large Vocabulary Speech Recognition},
booktitle =  INTERSPEECH,
year = {2012},
}

@inproceedings{DNN-SWB-seide+2011,
  title={Conversational speech transcription using context-dependent deep neural networks},
  author={Seide, Frank and Li, Gang and Yu, Dong},
  booktitle= INTERSPEECH,
  pages={437--440},
  year={2011}
}

@incollection{Tandem-Qifeng+2005,
year={2005},
booktitle={Machine Learning for Multimodal Interaction},
volume={3361},
title={Tandem Connectionist Feature Extraction for Conversational Speech Recognition},
publisher={Springer Berlin Heidelberg},
author={Zhu, Qifeng and Chen, Barry and Morgan, Nelson and Stolcke, Andreas},
pages={223-231}
}

@inproceedings{Tandem-Hermansky+2000,
  title={Tandem connectionist feature extraction for conventional {HMM} systems},
  author={Hermansky, Hynek and Ellis, Daniel PW and Sharma, Sangita},
  booktitle= ICASSP,
  volume={3},
  pages={1635--1638},
  year={2000},
}

@inproceedings{BMMI-Povey+2008,
  title={Boosted {MMI} for model and feature-space discriminative training},
  author={Povey, Daniel and Kanevsky, Dimitri and Kingsbury, Brian and Ramabhadran, Bhuvana and Saon, George and Visweswariah, Karthik},
  booktitle= ICASSP,
  pages={4057--4060},
  year={2008},
}

@inproceedings{MPE-Povey:2002,
  title={Minimum phone error and {I}-smoothing for improved discriminative training},
  author={Povey, Dan and Woodland, Phil C},
  booktitle= ICASSP,
  volume={1},
  pages={I--105},
  year={2002},
}

@inproceedings{MMI-TIMIT-kapadia+1993,
  title={{MMI} training for continuous phoneme recognition on the {TIMIT} database},
  author={Kapadia, S and Valtchev, V and Young, SJ},
  booktitle= ICASSP,
  volume={2},
  pages={491--494},
  year={1993},
}

@article{DNN-4Groups-Hinton+2012,
  title={Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups},
  author={Hinton, Geoffrey and Deng, Li and Yu, Dong and Dahl, George E and Mohamed, Abdel-rahman and Jaitly, Navdeep and Senior, Andrew and Vanhoucke, Vincent and Nguyen, Patrick and Sainath, Tara N and others},
  journal=SPM,
  volume={29},
  number={6},
  pages={82--97},
  year={2012},
}


@article{CD-DNN-HMM-Trans-Dahl+2012,
  title={Context-dependent pre-trained deep neural networks for large-vocabulary speech recognition},
  author={Dahl, George E and Yu, Dong and Deng, Li and Acero, Alex},
  journal=TransASLP,
  volume={20},
  number={1},
  pages={30--42},
  year={2012},
}

@TECHREPORT{RBM-PractialGuide-Hinton2010,
  AUTHOR =       {G. Hinton},
  TITLE =        {A practical guide to training restricted {B}oltzmann machines},
  INSTITUTION =  {University of Toronto},
  YEAR =         {2010},
  number =       {UTML TR 2010-003},
}

@Article{DBN-Hinton+2006,
author = "Geoffrey Hinton and Simon Osindero and Yee-Whye Teh",
title = "A fast learning algorithm for deep belief nets",
journal = {Neural Computation},
volume = {18},
pages = "1527--1554",
month = "July",
year = "2006"
}

@inproceedings{NLP-DNN+Multitask-Collobert:2008,
  title={A unified architecture for natural language processing: Deep neural networks with multitask learning},
  author={Collobert, Ronan and Weston, Jason},
  booktitle= ICML,
  pages={160--167},
  year={2008},
}

@ARTICLE{MeanField-Saul+1996,
    author = {Saul, Lawrence K and Jaakkola, Tommi and Jordan, Michael I},
    title = {Mean Field Theory for Sigmoid Belief Networks},
    journal = JAIR,
    year = {1996},
    volume = {4},
    pages = {61--76}
}

@article{SWB-Release2-Godfrey:1997,
  title={Switchboard-1 Release 2},
  author={Godfrey, John J and Holliman, Edward},
  journal={Linguistic Data Consortium},
  year={1997}
}


@inproceedings{MMI-Bahl+1986,
  title={Maximum mutual information estimation of hidden Markov model parameters for speech recognition},
  author={Bahl, Lalit and Brown, P and De Souza, P and Mercer, R},
  booktitle= ICASSP,
  volume={11},
  pages={49--52},
  year={1986},
}

@article{ANN-HMM-BroadcastNews-Robinson+2002,
  title={Connectionist speech recognition of broadcast news},
  author={Robinson, Anthony J and Cook, GD and Ellis, Daniel PW and Fosler-Lussier, Eric and Renals, SJ and Williams, DAG},
  journal={Speech Communication},
  volume={37},
  number={1},
  pages={27--45},
  year={2002},
}

@inproceedings{CD-NN-Bourlard+1992,
  title={{CDNN}: A context dependent neural network for continuous speech recognition},
  author={Bourlard, Herv{\'e} and Morgan, Nelson and Wooters, Chuck and Renals, Steve},
  booktitle= ICASSP,
  volume={2},
  pages={349--352},
  year={1992},
}

@INPROCEEDINGS{ExtractFeatureWithAutoencoder-Vincent+2008,
    author = {Vincent, Pascal and Larochelle, Hugo and Bengio, Yoshua and Manzagol, Pierre-Antoine},
     title = {Extracting and Composing Robust Features with Denoising Autoencoders},
      year = {2008},
     pages = {1096--1103},
     booktitle =  ICML,
}

@article{BackPropagation-rumelhart+1986,
   author			={Rumelhart, David E and Hintont, Geoffrey E and Williams, Ronald J},
 journal              = {Nature},
 number               = {6088},
 pages                = {533--536},
 title                = {Learning representations by back-propagating errors},
 volume               = {323},
 year                 = {1986},
 }

@techreport{ICSI-Tandem-TR,
 author               = {A. Faria},
 institution          = {International Computer Science Institute},
 number               = {TR-07-003},
 title                = {An Investigation of Tandem {MLP} Features for {ASR}},
 year                 = {2007},
 url                  = {http://www.icsi.berkeley.edu/pubs/techreports/faria\_icsitr.pdf},
 }

@inproceedings{ContinousSR-MLP-HMM-morgan:1990,
  title={Continuous speech recognition using multilayer perceptrons with hidden {M}arkov models},
  author={Morgan, Nelson and Bourlard, Herve},
  booktitle= ICASSP,
  pages={413--416},
  year={1990},
}

@article{ANN-HMM-Survey-Trentin:2001,
  title={A survey of hybrid {ANN/HMM} models for automatic speech recognition},
  author={Trentin, Edmondo and Gori, Marco},
  journal={Neurocomputing},
  volume={37},
  number={1},
  pages={91--126},
  year={2001},
}

@INPROCEEDINGS{AutoencoderPretrain-Bengio+2006,
    author = {Bengio, Yoshua and Lamblin, Pascal and Popovici, Dan and Larochelle, Hugo},
     title = {Greedy Layer-Wise Training of Deep Networks},
      year = {2006},
     pages = {153--160},
  booktitle =  NIPS
}

@INPROCEEDINGS{TrainDeepArchitectureDifficult-Erhan+2009,
author = {Erhan, Dumitru and Manzagol, Pierre-Antoine and Bengio, Yoshua and Bengio, Samy and Vincent, Pascal},
title = {The Difficulty of Training Deep Architectures and the effect of Unsupervised Pre-Training},
year = {2009},
pages = {153--160},
booktitle =  AISTATS,  
}

@article{ContrastiveDivergence-Hinton2002,
  title={Training products of experts by minimizing contrastive divergence},
  author={Hinton, Geoffrey E},
  journal={Neural computation},
  volume={14},
  number={8},
  pages={1771--1800},
  year={2002},
}

@inproceedings{DeepCRF-Yu:2010,
  title={Deep-structured hidden conditional random fields for phonetic recognition.},
  author={Yu, Dong and Deng, Li},
  booktitle= INTERSPEECH,
  pages={2986--2989},
  year={2010}
}

@inproceedings{HCRF-Yu+2009,
  title={Hidden conditional random field with distribution constraints for phone classification.},
  author={Yu, Dong and Deng, Li and Acero, Alex},
  booktitle=INTERSPEECH,
  pages={676--679},
  year={2009}
}

@inproceedings{HCRF-Gunawardana+2005,
  title={Hidden conditional random fields for phone classification.},
  author={Gunawardana, Asela and Mahajan, Milind and Acero, Alex and Platt, John C},
  booktitle= INTERSPEECH,
  pages={1117--1120},
  year={2005},
}

@article{VPHMM-Yu+2009,
  title={A novel framework and training algorithm for variable-parameter hidden Markov models},
  author={Yu, Dong and Deng, Li and Gong, Yifan and Acero, Alex},
  journal=TransASLP,
  volume={17},
  number={7},
  pages={1348--1360},
  year={2009},
}

@inproceedings{IncreaseMargeInMCE-Yu+2006,
  title={Use of incrementally regulated discriminative margins in {MCE} training for speech recognition.},
  author={Yu, Dong and Deng, Li and He, Xiaodong and Acero, Alex},
  booktitle= INTERSPEECH,
  year={2006}
}

@inproceedings{LargeMarginMCE4LargeScaleASR-Yu+2007,
  title={Large-margin minimum classification error training for large-scale speech recognition tasks},
  author={Yu, Dong and Deng, Li and He, Xiaodong and Acero, Alex},
  booktitle= ICASSP,
  volume={4},
  pages={IV--1137},
  year={2007},
}

@article{LargeMarginMCE-RiskMinPerspect-Yu+2008,
  title={Large-margin minimum classification error training: A theoretical risk minimization perspective},
  author={Yu, Dong and Deng, Li and He, Xiaodong and Acero, Alex},
  journal=CSL,
  volume={22},
  number={4},
  pages={415--429},
  year={2008}
}

@inproceedings{DirectoryAssistance-Yu+2007,
  title={Automated directory assistance system-from theory to practice.},
  author={Yu, Dong and Ju, Yun-Cheng and Wang, Ye-Yi and Zweig, Geoffrey and Acero, Alex},
  booktitle= INTERSPEECH,
  pages={2709--2712},
  year={2007}
}

@inproceedings{SCRF4LVSR-Zweig:2009,
  title={A segmental {CRF} approach to large vocabulary continuous speech recognition},
  author={Zweig, Geoffrey and Nguyen, Patrick},
  booktitle= ASRU,
  pages={152--157},
  year={2009},
}

@inproceedings{LVSR-CD-DBN-HMM-Dahl+2011,
  title={Large vocabulary continuous speech recognition with context-dependent {DBN-HMM}s},
  author={Dahl, George E and Yu, Dong and Deng, Li and Acero, Alex},
  booktitle= ICASSP,
  pages={4688--4691},
  year={2011},
}

@inproceedings{RolesOfPretrainInCD-DNN-HMM-Yu+2010,
  title={Roles of pre-training and fine-tuning in context-dependent {DBN-HMM}s for real-world speech recognition},
  author={Yu, Dong and Deng, Li and Dahl, George},
  booktitle= NIPS # { Workshop on Deep Learning and Unsupervised Feature Learning},
  year={2010}
}
@inproceedings{RegionDependentTransform-Zhang+2006,
  title={Discriminatively trained region dependent feature transforms for speech recognition},
  author={Zhang, Bing and Matsoukas, Spyros and Schwartz, Richard},
  booktitle= ICASSP,
  volume={1},
  pages={I--I},
  year={2006},
}

@article{HLDA-Kumar:1998,
  title={Heteroscedastic discriminant analysis and reduced rank {HMM}s for improved speech recognition},
  author={Kumar, Nagendra and Andreou, Andreas G},
  journal={Speech Communication},
  volume={26},
  number={4},
  pages={283--297},
  year={1998},
}

@inproceedings{fMPE-Povey+2005,
  title={{fMPE}: Discriminatively trained features for speech recognition},
  author={Povey, Daniel and Kingsbury, Brian and Mangu, Lidia and Saon, George and Soltau, Hagen and Zweig, Geoffrey},
  booktitle= ICASSP,
  volume={1},
  pages={961--964},
  year={2005},
}

@inproceedings{SequenceTraining-Su+2013,
  title={Error back propagation for sequence training of context-dependent deep networks for conversational speech transcription},
  author={Su, Hang and Li, Gang and Yu, Dong and Seide, Frank},
  booktitle= ICASSP,
  year={2013}
}

@article{NN4ASR-Morgan:1995,
  title={Neural networks for statistical recognition of continuous speech},
  author={Morgan, Nelson and Bourlard, Herve A},
  journal={Proceedings of the IEEE},
  volume={83},
  number={5},
  pages={742--772},
  year={1995},
}

@article{LinksBetweenHMMAndMLP-Bourlard:1990,
  title={Links between {M}arkov models and multilayer perceptrons},
  author={Bourlard, Herv{\'e} and Wellekens, Christian J},
  journal=PAMI,
  volume={12},
  number={12},
  pages={1167--1178},
  year={1990},
}

@article{WhyPretrainingHelps-Erhan+2010,
  title={Why does unsupervised pre-training help deep learning?},
  author={Erhan, Dumitru and Bengio, Yoshua and Courville, Aaron and Manzagol, Pierre-Antoine and Vincent, Pascal and Bengio, Samy},
  journal=JMLR,
  volume={11},
  pages={625--660},
  year={2010},
}

@inproceedings{DiscriminativeRBM-Larochelle:2008,
  title={Classification using discriminative restricted {B}oltzmann machines},
  author={Larochelle, Hugo and Bengio, Yoshua},
  booktitle= ICML,
  pages={536--543},
  year={2008},
}

@inproceedings{HybridPretrain-Sainath+2012,
  title={Improving training time of deep belief networks through hybrid pre-training and larger batch sizes},
  author={Sainath, T and Kingsbury, B and Ramabhadran, B},
  booktitle= NIPS # { Workshop on Log-linear Models},
  year={2012}
}

@article{Wake-Sleep-Hinton+1995,
  title={The "wake-sleep" algorithm for unsupervised neural networks},
  author={Hinton, Geoffrey E and Dayan, Peter and Frey, Brendan J and Neal, Radford M},
  journal={SCIENCE-NEW YORK THEN WASHINGTON-},
  pages={1158--1158},
  year={1995},
}

@inproceedings{RBM4CollaborativeFiltering-Salakhutdinov+2007,
  title={Restricted Boltzmann machines for collaborative filtering},
  author={Salakhutdinov, Ruslan and Mnih, Andriy and Hinton, Geoffrey},
  booktitle= ICML,
  pages={791--798},
  year={2007},
}

@inproceedings{RBM4FeatureLearn-Coates+2011,
  title={An analysis of single-layer networks in unsupervised feature learning},
  author={Coates, Adam and Ng, Andrew Y and Lee, Honglak},
  booktitle= AISTATS,
  pages={215--223},
  year={2011}
}

@inproceedings{RBM4TopicModel-Hinton:2009,
  title={Replicated softmax: an undirected topic model},
  author={Hinton, Geoffrey E and Salakhutdinov, Ruslan},
  booktitle= NIPS,
  pages={1607--1614},
  year={2009}
}

@article{RBM4TTS-Ling+2013,
  title={Modeling spectral envelopes using restricted Boltzmann machines and deep belief networks for statistical parametric speech synthesis},
  author={Ling, Z-H and Deng, Li and Yu, Dong},
  journal=TransASLP,
  volume={21},
  number={10},
  pages={2129--2139},
  year={2013},
  publisher={IEEE}
}

@article{RBM-Proposed-Smolensky1986,
  title={Information processing in dynamical systems: Foundations of harmony theory},
  author={Smolensky, Paul},
  year={1986},
  publisher={Department of Computer Science, University of Colorado, Boulder}
}

@inproceedings{AcceleratedSGD-Nesterov1983,
  title={A method of solving a convex programming problem with convergence rate {O (1/k2)}},
  author={Nesterov, Yurii},
  booktitle={Soviet Mathematics Doklady},
  volume={27},
  number={2},
  pages={372--376},
  year={1983}
}

@article{SGD-CNTK-Guenter+2013,
  title={Stochastic Gradient Descent Algorithm in the Computational Network Toolkit},
  author={Guenter, Brian and Yu, Dong and Eversole, Adam and Kuchaiev, Oleksii and Seltzer, Michael L}
  booktitle= NIPS # { Workshop on Optimization for Machine Learning},
  year={2013}
}

@misc{ConjugateGradient-Hestenes:1952,
  title={Methods of conjugate gradients for solving linear systems},
  author={Hestenes, Magnus Rudolph and Stiefel, Eduard},
  year={1952},
  publisher={NBS}
}

@article{SimulatedAnnealing-Kirkpatrick+1983,
  title={Optimization by simmulated annealing},
  author={Kirkpatrick, Scott and Jr., D. Gelatt and Vecchi, Mario P},
  journal={science},
  volume={220},
  number={4598},
  pages={671--680},
  year={1983},
}

@article{SGD-Bottou1998,
  title={Online learning and stochastic approximations},
  author={Bottou, L{\'e}on},
  journal={On-line learning in neural networks},
  volume={17},
  pages={9},
  year={1998},
}

@article{L-BFGS-Liu:1989,
  title={On the limited memory {BFGS} method for large scale optimization},
  author={Liu, Dong C and Nocedal, Jorge},
  journal={Mathematical programming},
  volume={45},
  number={1-3},
  pages={503--528},
  year={1989},
}

@article{AdaGrad-Duchi+2011,
  title={Adaptive subgradient methods for online learning and stochastic optimization},
  author={Duchi, John and Hazan, Elad and Singer, Yoram},
  journal=JMLR,
  volume={999999},
  pages={2121--2159},
  year={2011},
}

@inproceedings{CMN-Liu+1993,
  title={Efficient cepstral normalization for robust speech recognition},
  author={Liu, Fu-Hua and Stern, Richard M and Huang, Xuedong and Acero, Alejandro},
  booktitle=WHLT,
  pages={69--74},
  year={1993},
}

@article{BayesianOptimization-Snoek+2012,
  title={Practical {B}ayesian optimization of machine learning algorithms},
  author={Snoek, Jasper and Larochelle, Hugo and Adams, Ryan P},
  journal={arXiv preprint arXiv:1206.2944},
  year={2012}
}

@incollection{PracticalTrainDeep-bengio2012,
  title={Practical recommendations for gradient-based training of deep architectures},
  author={Bengio, Yoshua},
  booktitle={Neural Networks: Tricks of the Trade},
  pages={437--478},
  year={2012},
  publisher={Springer}
}

@article{MLP-UniversalApproximator-Hornik+1989,
  title={Multilayer feedforward networks are universal approximators},
  author={Hornik, Kurt and Stinchcombe, Maxwell and White, Halbert},
  journal={Neural networks},
  volume={2},
  number={5},
  pages={359--366},
  year={1989},
}

@inproceedings{SparseRectifierNN-Glorot+2011,
  title={Deep Sparse Rectifier Networks},
  author={Glorot, Xavier and Bordes, Antoine and Bengio, Yoshua},
  booktitle= AISTAT,
  volume={15},
  pages={315--323},
  year={2011}
}

@incollection{EfficientBackprop-Lecun+1998,
  title={Efficient backprop},
  author={LeCun, Yann and Bottou, L{\'e}on and Orr, Genevieve B and M{\"u}ller, Klaus-Robert},
  booktitle={Neural networks: Tricks of the trade},
  pages={9--50},
  year={1998},
  publisher={Springer}
}

@inproceedings{WordSenseDisambiguation-Yuret2007,
  title={{KU}: Word sense disambiguation by substitution},
  author={Yuret, Deniz},
  booktitle= SemEval,
  pages={207--213},
  year={2007},
}

@inproceedings{ChunkingWithSVM-Kudo:2001,
  title={Chunking with support vector machines},
  author={Kudo, Taku and Matsumoto, Yuji},
  booktitle= NAACL,
  pages={1--8},
  year={2001},
}

@inproceedings{JointLMTMWithRNN-Auli+2013,
  title={Joint language and translation modeling with recurrent neural networks},
  author={Auli, Michael and Galley, Michel and Quirk, Chris and Zweig, Geoffrey},
  year={2013},
  booktitle= EMNLP
}

@article{t-SNE-Van:2008,
  title={Visualizing data using {t-SNE}},
  author={Van der Maaten, Laurens and Hinton, Geoffrey},
  journal=JMLR,
  volume={9},
  number={2579-2605},
  pages={85},
  year={2008}
}

@inproceedings{FeatEngInDNN-Seide+2011,
  title={Feature engineering in context-dependent deep neural networks for conversational speech transcription},
  author={Seide, Frank and Li, Gang and Chen, Xie and Yu, Dong},
  booktitle= ASRU,
  pages={24--29},
  year={2011},
}

@misc{FeatureLearnInDNN-Yu+2013,
    author = {Dong Yu and Michael L. Seltzer and Jinyu Li and Jui-Ting Huang and Frank Seide},
    booktitle = ICLR,
    title = {Feature Learning in Deep Neural Networks - Studies on Speech Recognition Tasks},
    year = {2013}
}

@inproceedings{ATIS3Corpus-Dahl+1994,
  title={Expanding the scope of the {ATIS} task: The {ATIS}-3 corpus},
  author={Dahl, Deborah A and Bates, Madeleine and Brown, Michael and Fisher, William and Hunicke-Smith, Kate and Pallett, David and Pao, Christine and Rudnicky, Alexander and Shriberg, Elizabeth},
  booktitle=WHLT,
  pages={43--48},
  year={1994},
}

@article{NLPFromScratch-Collobert+2011,
  title={Natural language processing (almost) from scratch},
  author={Collobert, Ronan and Weston, Jason and Bottou, L{\'e}on and Karlen, Michael and Kavukcuoglu, Koray and Kuksa, Pavel},
  journal=JMLR,
  volume={12},
  pages={2493--2537},
  year={2011}
}

@inproceedings{LexicalSubstitution-Giuliano+2007,
 author = {Claudio Giuliano and Alfio Gliozzo and Carlo Strapparava},
 title = {{FBK}-irst: Lexical Substitution Task Exploiting Domain and Syntagmatic Coherence},
 booktitle =  SemEval,
 year = {2007},
 pages = {145--148},
}

@article{DiscriminativeLM-Arisoy+2012,
  title={Discriminative language modeling with linguistic and statistically derived features},
  author={Arisoy, Ebru and Sara{\c{c}}lar, Murat and Roark, Brian and Shafran, Izhak},
  journal=TransASLP,
  volume={20},
  number={2},
  pages={540--550},
  year={2012},
}

@article{LMClustering-Martin+1998,
title = "Algorithms for bigram and trigram word clustering",
journal = "Speech Communication",
volume = "24",
number = "1",
pages = "19 - 37",
year = "1998",
author = "Sven Martin and Jörg Liermann and Hermann Ney"
}

@article{MaxEntLM-Khudanpur:2000,
title = "Maximum entropy techniques for exploiting syntactic, semantic and collocational dependencies in language modeling",
journal = CSL,
volume = "14",
number = "4",
pages = "355 - 372",
year = "2000",
author = "Sanjeev Khudanpur and Jun Wu"
}

@inproceedings{NNLM4LVSR-Schwenk:2002,
  title={Connectionist language modeling for large vocabulary continuous speech recognition},
  author={Schwenk, Holger and Gauvain, Jean-Luc},
  booktitle= ICASSP,
  volume={1},
  pages={I--765},
  year={2002},
}

@article{PennTreebank-Marcus+1993,
  title={Building a large annotated corpus of English: The Penn Treebank},
  author={Marcus, Mitchell P and Marcinkiewicz, Mary Ann and Santorini, Beatrice},
  journal={Computational linguistics},
  volume={19},
  number={2},
  pages={313--330},
  year={1993},
}

@inproceedings{NNLM-LongRangeDependency-Hai+2012,
  title={Measuring the influence of long range dependencies with neural network language models},
  author={Hai Son, Le and Allauzen, Alexandre and Yvon, Fran{\c{c}}ois},
  booktitle={Proc. {NAACL-HLT} 2012 Workshop: Will We Ever Really Replace the N-gram Model? On the Future of Language Modeling for {HLT}},
  pages={1--10},
  year={2012},
}

@inproceedings{ATIS-Hemphill+1990,
  title={The {ATIS} spoken language systems pilot corpus},
  author={Hemphill, Charles T and Godfrey, John J and Doddington, George R},
  booktitle=DARPASNLW,
  pages={96--101},
  year={1990}
}

@inproceedings{LMInGPU4SMT-Schwenk+2012,
  title={Large, pruned or continuous space language models on a GPU for statistical machine translation},
  author={Schwenk, Holger and Rousseau, Anthony and Attik, Mohammed},
  booktitle={Proc. {NAACL-HLT} Workshop: Will We Ever Really Replace the N-gram Model? On the Future of Language Modeling for {HLT}},
  pages={11--19},
  year={2012},
}

@inproceedings{EvalSLTWithATIS-Price1990,
  title={Evaluation of spoken language systems: The {ATIS} domain},
  author={Price, Patti},
  booktitle=DARPASNLW,
  pages={91--95},
  year={1990},
}

@inproceedings{CMU-AirTravelSystem-Ward+1990,
  title={The {CMU} air travel information service: Understanding spontaneous speech},
  author={Ward, Wayne and others},
  booktitle=DARPASNLW,
  pages={127--129},
  year={1990}
}

@inproceedings{DataDrivenSLU-He:2003,
  title={A data-driven spoken language understanding system},
  author={He, Yulan and Young, Steve},
  booktitle= ASRU,
  pages={583--588},
  year={2003},
}

@inproceedings{GenerativeDiscriminative-SLU-Raymond:2007,
  title={Generative and discriminative algorithms for spoken language understanding.},
  author={Raymond, Christian and Riccardi, Giuseppe},
  booktitle= INTERSPEECH,
  pages={1605--1608},
  year={2007}
}

@inproceedings{CombineStatisticalAndKnowledgeInSLU-Wang+2006,
  title={Combining statistical and knowledge-based spoken language understanding in conditional models},
  author={Wang, Ye-Yi and Acero, Alex and Mahajan, Milind and Lee, John},
  booktitle=COLING,
  pages={882--889},
  year={2006},
}

@inproceedings{SLUSurvey-De2007,
  title={Spoken language understanding: A survey},
  author={De Mori, Renato},
  booktitle= ASRU,
  pages={365--376},
  year={2007},
}

@article{SLU-Survey-Bechet2008,
  title={Processing spontaneous speech in deployed spoken language understanding systems: a survey},
  author={B{\'e}chet, Fr{\'e}d{\'e}ric},
  journal=SLT,
  volume={1},
  year={2008}
}

@inproceedings{SLU-Kernal-Moschitti+2007,
  title={Spoken language understanding with kernels for syntactic/semantic structures},
  author={Moschitti, Alessandro and Riccardi, Giuseppe and Raymond, Christian},
  booktitle= ASRU,
  pages={183--188},
  year={2007},
}

@inproceedings{ContinousLM4SMT-Schwenk2006,
  title={Continuous space language models for statistical machine translation},
  author={Schwenk, Holger and Dchelotte, Daniel and Gauvain, Jean-Luc},
  booktitle=COLING,
  pages={723--730},
  year={2006},
}

@inproceedings{CD-RNN-LM-Mikolov:2012,
  title={Context dependent recurrent neural network language model.},
  author={Mikolov, Tomas and Zweig, Geoffrey},
  booktitle=SLT,
  pages={234--239},
  year={2012}
}

@inproceedings{Extensions-RNN-LM-Mikolov:2011,
  title={Extensions of recurrent neural network language model},
  author={Mikolov, Tomas and Kombrink, Stefan and Burget, Lukas and Cernocky, Jan Honza and Khudanpur, Sanjeev},
  booktitle=ICASSP,
  year={2011}
}

@inproceedings{SpeedRegularization-Zweig:2013,
  title={Speed regularization and optimality in word classing},
  author={Zweig, Geoffrey and Makarychev, Konstantin},
  booktitle=ICASSP,
  year={2013}
}

@inproceedings{RNNLMWithLinguisticFeature-Shi+2012,
  title={Towards Recurrent Neural Networks Language Models with Linguistic and Contextual Features.},
  author={Shi, Yangyang and Wiggers, Pascal and Jonker, Catholijn M},
  booktitle=INTERSPEECH,
  year={2012}
}

@inproceedings{ClassBasedLM-Kneser:1993,
  title = {Improved Clustering Techniques for Class-Based Statistical Language Modeling},
  author = {Reinhard  Kneser and Hermann. Ney},
  booktitle = EUROSPEECH,
  year = {1993}
}

@article{ContinuousLM-Schwenk2007,
  title={Continuous space language models},
  author={Schwenk, Holger},
  journal=CSL,
  volume={21},
  number={3},
  pages={492--518},
  year={2007},
}

@incollection{NNLM-Bengio+2006,
  title={Neural probabilistic language models},
  author={Bengio, Yoshua and Schwenk, Holger and Sen{\'e}cal, Jean-S{\'e}bastien and Morin, Fr{\'e}deric and Gauvain, Jean-Luc},
  booktitle={Innovations in Machine Learning},
  pages={137--186},
  year={2006},
  publisher={Springer}
}

@inproceedings{StructuredOutlayer-NNLM-Le+2011,
  title={Structured output layer neural network language model},
  author={Le, Hai-Son and Oparin, Ilya and Allauzen, Alexandre and Gauvain, J-L and Yvon, Fran{\c{c}}ois},
  booktitle= ICASSP,
  pages={5524--5527},
  year={2011},
}

@inproceedings{Hierarchical-NNLM-Morin:2005,
  title={Hierarchical probabilistic neural network language model},
  author={Morin, Frederic and Bengio, Yoshua},
  booktitle=AISTAT,
  pages={246--252},
  year={2005}
}

@inproceedings{CombineKnowledge4Lexical-Hassan+2007,
 author = {Samer Hassan and Andras Csomai and Carmen Banea and Ravi Sinha and Rada Mihalcea},
 title = {{UNT}: {SubFinder}: Combining Knowledge Sources for Automatic Lexical Substitution},
 booktitle =  SemEval,
 series = {SemEval '07},
 year = {2007},
 pages = {410--413},
}

@inproceedings{WordPairAntonymy-Mohammad+2008,
  title={Computing word-pair antonymy},
  author={Mohammad, Saif and Dorr, Bonnie and Hirst, Graeme},
  booktitle= EMNLP,
  pages={982--991},
  year={2008},
}

@inproceedings{LexicalSubstitution-Mccarthy:2007,
  title={Semeval-2007 task 10: English lexical substitution task},
  author={McCarthy, Diana and Navigli, Roberto},
  booktitle= SemEval,
  pages={48--53},
  year={2007},
}

@inproceedings{ModelM4VoiceSearch-Zweig:2011,
  title={Personalizing Model [{M} for Voice-Search.},
  author={Zweig, Geoffrey and Chang, Shuangyu},
  booktitle= INTERSPEECH,
  pages={609--612},
  year={2011}
}

@Misc{Web5Gram-Brants:2006,
  author = {Thorsten Brants and Alex Franz},
  title = {Web {1T} 5-gram Version 1},
  publisher = {Linguistic Data Consortium},
  year = {2006}
}

@article{TopicDetectionAndLMAdapt-Seymore:1997,
  title={Large-scale topic detection and language model adaptation},
  author={Seymore, Kristie and Rosenfield, Roni},
  year={1997}
}

@article{MeasureSemanticOpposition-Mohammad+2011,
  title={Measuring degrees of semantic opposition},
  author={Mohammad, Saif M and Dorr, Bonnie J and Hirst, Graeme and Turney, Peter D},
  year={2011}
}

@techreport{MSR-SentenceCompletionChallenge-Zweig:2011,
  title={The Microsoft Research sentence completion challenge},
  author={Zweig, Geoffrey and Burges, Christopher JC},
  year={2011},
  institution={Technical Report MSR-TR-2011-129, Microsoft}
}

@Inproceedings {Variable-Component-Deep-Neural-Network:2014,
author       = {Rui Zhao and Jinyu Li and Yifan Gong},
booktitle    = {Interspeech},
month        = {September},
title        = {Variable-Component Deep Neural Network for Robust Speech Recognition},
url          = {http://research.microsoft.com/apps/pubs/default.aspx?id=230079},
year         = {2014},
}

@inproceedings{RetrieveSimilarWords-Lin1998,
  title={Automatic retrieval and clustering of similar words},
  author={Lin, Dekang},
  booktitle=COLING,
  pages={768--774},
  year={1998},
}

@inproceedings{CMU-LM-Toolkit-Clarkson:1997,
  title={Statistical language modeling using the {CMU}-cambridge toolkit.},
  author={Clarkson, Philip and Rosenfeld, Ronald},
  booktitle=EUROSPEECH,
  volume={97},
  pages={2707--2710},
  year={1997}
}

@inproceedings{IntegrateSemanticInLM-Coccaro:1998,
  title={Towards better integration of semantic predictors in statistical language modeling.},
  author={Coccaro, Noah and Jurafsky, Daniel},
  booktitle=ICSLP,
  year={1998},
}

@inproceedings{GenTextWithRNN-Sutskever+2011,
  title={Generating text with recurrent neural networks},
  author={Sutskever, Ilya and Martens, James and Hinton, Geoffrey E},
  booktitle= ICML,
  pages={1017--1024},
  year={2011}
}

@inproceedings{ParseLanguageWithRNN-Socher+2011,
  title={Parsing natural scenes and natural language with recursive neural networks},
  author={Socher, Richard and Lin, Cliff C and Ng, Andrew and Manning, Chris},
  booktitle= ICML,
  pages={129--136},
  year={2011}
}

@inproceedings{SolveLogicPuzzle-Lev+2004,
 author = {Lev, Iddo and MacCartney, Bill and Manning, Christopher D. and Levy, Roger},
 title = {Solving logic puzzles: from robust processing to precise semantics},
 booktitle = {Proc. ACL Workshop on Text Meaning and Interpretation},
 year = {2004},
 pages = {9--16},
} 

@inproceedings{DeepRead-Hirschman+1999,
  title={Deep Read: A reading comprehension system},
  author={Hirschman, Lynette and Light, Marc and Breck, Eric and Burger, John D},
  booktitle={Proc. 37th annual meeting of the Association for Computational Linguistics on Computational Linguistics},
  pages={325--332},
  year={1999},
}

@inproceedings{ReadingComprehension-Charniak+2000,
 author = {Charniak, Eugene and Altun, Yasemin and Braz, Rodrigo de Salvo and Garrett, Benjamin and Kosmala, Margaret and Moscovich, Tomer and Pang, Lixin and Pyo, Changhee and Sun, Ye and Wy, Wei and Yang, Zhongfa and Zeller, Shawn and Zorn, Lisa},
 title = {Reading comprehension programs in a statistical-language-processing class},
 booktitle = {Proc. {ANLP/NAACL} Workshop on Reading comprehension tests as evaluation for computer-based language understanding sytems},
 year = {2000},
 pages = {1--5},
} 

@inproceedings{RuleBasedQA-Riloff:2000,
 author = {Riloff, Ellen and Thelen, Michael},
 title = {A rule-based question answering system for reading comprehension tests},
 booktitle = {Proc. {ANLP/NAACL} Workshop on Reading comprehension tests as evaluation for computer-based language understanding sytems},
 year = {2000},
 pages = {13--19}
} 

@inproceedings{NLUWithSMT-Macherey+2001,
  title={Natural language understanding using statistical machine translation.},
  author={Macherey, Klaus and Och, Franz Josef and Ney, Hermann},
  booktitle= INTERSPEECH,
  pages={2205--2208},
  year={2001},
  organization={Citeseer}
}

@inproceedings{DeepConvexNet4SLU-Deng+2012,
  title={Use of kernel deep convex networks and end-to-end learning for spoken language understanding},
  author={Deng, Li and Tur, Gokhan and He, Xiaodong and Hakkani-Tur, Dilek},
  booktitle=SLT,
  pages={210--215},
  year={2012},
}

@inproceedings{SimplifySentence4SLU-Tur+2011,
  title={Sentence simplification for spoken language understanding},
  author={Tur, Gokhan and Hakkani-Tur, D and Heck, Larry and Parthasarathy, Sarangarajan},
  booktitle= ICASSP,
  pages={5628--5631},
  year={2011},
}

@inproceedings{FSA+NN4SLU-Wutiwiwatchai:2003,
  title={Combination of finite state automata and neural network for spoken language understanding.},
  author={Wutiwiwatchai, Chai and Furui, Sadaoki},
  booktitle= INTERSPEECH,
  year={2003}
}

@inproceedings{QA-Wang+2000,
  title={A question answering system developed as a project in a natural language processing course},
  author={Wang, W and Auer, J and Parasuraman, R and Zubarev, I and Brandyberry, D and Harper, MP},
  booktitle={Proc. {ANLP/NAACL} Workshop on Reading comprehension tests as evaluation for computer-based language understanding sytems},
  pages={28--35},
  year={2000},
}

@inproceedings{ML4QA-Ng+2000,
 author = {Ng, Hwee Tou and Teo, Leong Hwee and Kwan, Jennifer Lai Pheng},
 title = {A machine learning approach to answering questions for reading comprehension tests},
 booktitle = EMNLP,
 year = {2000},
 pages = {124--132}
} 

@inproceedings{LearnKnowledgeBySVD-Laham:1998,
  title={Learning human-like knowledge by singular value decomposition: A progress report},
  author={Laham, Thomas K Landauer Darrell and Foltz, Peter},
  booktitle= NIPS,
  volume={10},
  pages={45},
  year={1998},
  organization={MIT Press}
}

@inproceedings{CRF4MovieSearch-Liu+2012,
  title={A Conversational Movie Search System Based on Conditional Random Fields.},
  author={Liu, Jingjing and Cyphers, Scott and Pasupat, Panupong and McGraw, Ian and Glass, Jim},
  booktitle= INTERSPEECH,
  year={2012}
}

@article{LatentSemanticInLM-Bellegarda2000,
  title={Exploiting latent semantic information in statistical language modeling},
  author={Bellegarda, Jerome R},
  journal={Proceedings of the IEEE},
  volume={88},
  number={8},
  pages={1279--1296},
  year={2000},
}

@article{ConversationalInterface-Zue:2000,
  title={Conversational interfaces: Advances and challenges},
  author={Zue, Victor W and Glass, James R},
  journal={Proceedings of the IEEE},
  volume={88},
  number={8},
  pages={1166--1180},
  year={2000},
}

@Book{Intro2IR-Salton:83,
  author = {Gerard Salton and Michael J. McGill},
  title = {Introduction to Modern Information Retrieval},
  publisher = {McGraw Hill},
  year = 1983, 
}

@book{LexicalSemantics-Cruse1986,
  title={Lexical semantics},
  author={Cruse, D Alan},
  year={1986},
  publisher={Cambridge University Press}
}

@Book{ModernIR-BaezaYates:99,
  author = {Ricardo Baeza-Yates and Berthier Ribiero-Neto},
  title = {Modern Information Retrieval},
  publisher = {Addison-Wesley},
  year = 1999, 
}

@InProceedings{TranslinguaDocRepFromDiscriminativeProj-Platt+2010,
  author    = {Platt, John  and  Toutanova, Kristina  and  Yih, Wen-tau},
  title     = {Translingual Document Representations from Discriminative Projections},
  booktitle =  EMNLP,
  month     = {October},
  year      = {2010},
  pages     = {251--261},
}

@Misc{RNNToolkit-Mikolov2012,
author = "Tomas Mikolov",
title = "RNNToolkit http://www.fit.vutbr.cz/~imikolov/rnnlm/",
year = "2012",
url = "http://www.fit.vutbr.cz/~imikolov/rnnlm/"
}

@Misc{ImageNet,
title = "ImageNet",
url = "http://www.image-net.org/"
}

@InProceedings{DiscriminativeProjections4SimilarityMeasure-Yih+2011,
  author    = {Yih, Wen-tau  and  Toutanova, Kristina  and  Platt, John C.  and  Meek, Christopher},
  title     = {Learning Discriminative Projections for Text Similarity Measures},
  booktitle = {Proc. ACL Conference on Computational Natural Language Learning},
  month     = {June},
  year      = {2011},
  pages     = {247--256},
}

@book{NumericalOptimization-Nocedal:2006,
  author={Jorge Nocedal AND Stephen Wright},
  title ={Numerical Optimization},
  publisher={Springer},
  year ={2006},
  edition = {2nd}
}

@article{latentSemanticAnalysis-Deerwester+1990,
  title={Indexing by latent semantic analysis},
  author={Deerwester, Scott C. and Dumais, Susan T and Landauer, Thomas K. and Furnas, George W. and Harshman, Richard A.},
  journal=JASIS,
  volume={41},
  number={6},
  pages={391--407},
  year={1990}
}

@inproceedings{DocClusterWithNMF-Xu+2003,
 author = {Xu, Wei and Liu, Xin and Gong, Yihong},
 title = {Document clustering based on non-negative matrix factorization},
 booktitle = SIGIR,
 year = {2003},
 location = {Toronto, Canada},
 pages = {267--273},
} 

@article{CombineModules4MultiChoiceProblem-Turney+2003,
  title={Combining independent modules to solve multiple-choice synonym and analogy problems},
  author={Turney, Peter and Littman, Michael L and Bigham, Jeffrey and Shnayder, Victor},
  year={2003}
}

@article{MineWeb4Synonym-Turney2001,
  title={Mining the Web for Synonyms: PMI-IR versus LSA on TOEFL},
  author={Turney, Peter},
  year={2001}
}

@article{IntroLatentSemanticAnalysis-Landauer+1998,
  title={An introduction to latent semantic analysis},
  author={Landauer, Thomas K and Foltz, Peter W and Laham, Darrell},
  journal={Discourse processes},
  volume={25},
  number={2-3},
  pages={259--284},
  year={1998},
}

@article{BasisOfLearnAndCognitionFromLSA-Landauer2002,
  title={On the computational basis of learning and cognition: Arguments from {LSA}},
  author={Landauer, Thomas K},
  journal={Psychology of learning and motivation},
  volume={41},
  pages={43--84},
  year={2002},
}

@inproceedings{ConceptualVectors-Schwab+2002,
  title={Antonymy and conceptual vectors},
  author={Schwab, Didier and Lafourcade, Mathieu and Prince, Violaine},
  booktitle=COLING,
  pages={1--7},
  year={2002},
}
